From b752fd336c35e73db6cc462fe5ed94bd73f52cc5 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Thu, 23 Jan 2020 12:12:13 -0800 Subject: [PATCH] HBASE-23279 : Set ROW_INDEX_V1 as default DataBlock encoding --- .../hadoop/hbase/HColumnDescriptor.java | 3 +- .../client/ColumnFamilyDescriptorBuilder.java | 14 ++-- .../hbase/io/encoding/DataBlockEncoding.java | 3 + .../hadoop/hbase/io/hfile/HFileContext.java | 2 +- .../hbase/io/hfile/HFileContextBuilder.java | 2 +- .../io/hfile/HFileDataBlockEncoderImpl.java | 4 +- .../io/hfile/bucket/BucketAllocator.java | 1 + .../hbase/io/hfile/TestHFileWriterV3.java | 78 +++++++------------ .../mob/compactions/TestMobCompactor.java | 3 +- 9 files changed, 44 insertions(+), 66 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java index db2b4f5362..43cbab8324 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java @@ -80,7 +80,8 @@ public class HColumnDescriptor implements ColumnFamilyDescriptor, Comparable DataBlockEncoding.valueOf(n.toUpperCase()), DataBlockEncoding.NONE); + n -> DataBlockEncoding.valueOf(n.toUpperCase()), DataBlockEncoding + .DEFAULT_DATA_BLOCK_ENCODING); } /** @@ -807,7 +804,8 @@ public class ColumnFamilyDescriptorBuilder { * @return this (for chained invocation) */ public ModifyableColumnFamilyDescriptor setDataBlockEncoding(DataBlockEncoding type) { - return setValue(DATA_BLOCK_ENCODING_BYTES, type == null ? DataBlockEncoding.NONE.name() : type.name()); + return setValue(DATA_BLOCK_ENCODING_BYTES, type == null ? + DataBlockEncoding.DEFAULT_DATA_BLOCK_ENCODING.name() : type.name()); } /** diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoding.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoding.java index 335488b075..8a19b8d406 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoding.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoding.java @@ -79,6 +79,9 @@ public enum DataBlockEncoding { this.encoderCls = encoderClsName; } + public static final DataBlockEncoding DEFAULT_DATA_BLOCK_ENCODING = + DataBlockEncoding.ROW_INDEX_V1; + /** * @return name converted to bytes. */ diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContext.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContext.java index ea4782d035..8a2d65bc9b 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContext.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContext.java @@ -63,7 +63,7 @@ public class HFileContext implements HeapSize, Cloneable { private int bytesPerChecksum = DEFAULT_BYTES_PER_CHECKSUM; /** Number of uncompressed bytes we allow per block. */ private int blocksize = HConstants.DEFAULT_BLOCKSIZE; - private DataBlockEncoding encoding = DataBlockEncoding.NONE; + private DataBlockEncoding encoding = DataBlockEncoding.DEFAULT_DATA_BLOCK_ENCODING; /** Encryption algorithm and key used */ private Encryption.Context cryptoContext = Encryption.Context.NONE; private long fileCreateTime; diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContextBuilder.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContextBuilder.java index a44f273c58..cab4285ce8 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContextBuilder.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContextBuilder.java @@ -49,7 +49,7 @@ public class HFileContextBuilder { private int bytesPerChecksum = DEFAULT_BYTES_PER_CHECKSUM; /** Number of uncompressed bytes we allow per block. */ private int blocksize = HConstants.DEFAULT_BLOCKSIZE; - private DataBlockEncoding encoding = DataBlockEncoding.NONE; + private DataBlockEncoding encoding = DataBlockEncoding.DEFAULT_DATA_BLOCK_ENCODING; /** Crypto context */ private Encryption.Context cryptoContext = Encryption.Context.NONE; private long fileCreateTime = 0; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoderImpl.java index 347b1f3c59..8fa9f86514 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoderImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoderImpl.java @@ -42,7 +42,7 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder { * @param encoding What kind of data block encoding will be used. */ public HFileDataBlockEncoderImpl(DataBlockEncoding encoding) { - this.encoding = encoding != null ? encoding : DataBlockEncoding.NONE; + this.encoding = encoding != null ? encoding : DataBlockEncoding.DEFAULT_DATA_BLOCK_ENCODING; } public static HFileDataBlockEncoder createFromFileInfo( @@ -129,7 +129,7 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder { @Override public void startBlockEncoding(HFileBlockEncodingContext encodingCtx, DataOutputStream out) throws IOException { - if (this.encoding != null && this.encoding != DataBlockEncoding.NONE) { + if (this.encoding != DataBlockEncoding.NONE) { this.encoding.getEncoder().startBlockEncoding(encodingCtx, out); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketAllocator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketAllocator.java index 2883ff2f16..7784a2184d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketAllocator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketAllocator.java @@ -305,6 +305,7 @@ public final class BucketAllocator { BucketAllocator(long availableSpace, int[] bucketSizes) throws BucketAllocatorException { this.bucketSizes = bucketSizes == null ? DEFAULT_BUCKET_SIZES : bucketSizes; + LOG.info("BucketSizes during Bucket Allocation: {}", this.bucketSizes); Arrays.sort(this.bucketSizes); this.bigItemSize = Ints.max(this.bucketSizes); this.bucketCapacity = FEWEST_ITEMS_IN_BUCKET * (long) bigItemSize; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV3.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV3.java index 3cec836893..a2de896ef8 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV3.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV3.java @@ -19,10 +19,7 @@ package org.apache.hadoop.hbase.io.hfile; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; -import java.io.ByteArrayInputStream; -import java.io.DataInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; @@ -36,6 +33,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.ArrayBackedTag; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.CellComparatorImpl; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseCommonTestingUtility; import org.apache.hadoop.hbase.HBaseTestingUtility; @@ -46,13 +44,14 @@ import org.apache.hadoop.hbase.io.ByteBuffAllocator; import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; +import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext; +import org.apache.hadoop.hbase.io.encoding.RowIndexSeekerV1; import org.apache.hadoop.hbase.nio.ByteBuff; import org.apache.hadoop.hbase.testclassification.IOTests; import org.apache.hadoop.hbase.testclassification.SmallTests; -import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Writables; import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.WritableUtils; import org.junit.Before; import org.junit.ClassRule; import org.junit.Test; @@ -126,7 +125,9 @@ public class TestHFileWriterV3 { HFileContext context = new HFileContextBuilder() .withBlockSize(4096) .withIncludesTags(useTags) - .withCompression(compressAlgo).build(); + .withCompression(compressAlgo) + .withCellComparator(CellComparatorImpl.COMPARATOR) + .build(); CacheConfig cacheConfig = new CacheConfig(conf); HFile.Writer writer = new HFile.WriterFactory(conf, cacheConfig) .withPath(fs, hfilePath) @@ -178,8 +179,9 @@ public class TestHFileWriterV3 { assertEquals(entryCount, trailer.getEntryCount()); HFileContext meta = new HFileContextBuilder() .withCompression(compressAlgo) - .withIncludesMvcc(false) + .withIncludesMvcc(true) .withIncludesTags(useTags) + .withDataBlockEncoding(DataBlockEncoding.ROW_INDEX_V1) .withHBaseCheckSum(true).build(); ReaderContext readerContext = new ReaderContextBuilder() .withInputStreamWrapper(new FSDataInputStreamWrapper(fsdis)) @@ -227,62 +229,34 @@ public class TestHFileWriterV3 { HFileInfo fileInfo = new HFileInfo(); fileInfo.read(blockIter.nextBlockWithBlockType(BlockType.FILE_INFO).getByteStream()); byte [] keyValueFormatVersion = fileInfo.get(HFileWriterImpl.KEY_VALUE_VERSION); - boolean includeMemstoreTS = keyValueFormatVersion != null && - Bytes.toInt(keyValueFormatVersion) > 0; // Counters for the number of key/value pairs and the number of blocks int entriesRead = 0; int blocksRead = 0; - long memstoreTS = 0; // Scan blocks the way the reader would scan them fsdis.seek(0); long curBlockPos = 0; while (curBlockPos <= trailer.getLastDataBlockOffset()) { + HFileBlockDecodingContext ctx = blockReader.getBlockDecodingContext(); HFileBlock block = blockReader.readBlockData(curBlockPos, -1, false, false, true) .unpack(context, blockReader); - assertEquals(BlockType.DATA, block.getBlockType()); - ByteBuff buf = block.getBufferWithoutHeader(); - int keyLen = -1; - while (buf.hasRemaining()) { - - keyLen = buf.getInt(); - - int valueLen = buf.getInt(); - - byte[] key = new byte[keyLen]; - buf.get(key); - - byte[] value = new byte[valueLen]; - buf.get(value); - byte[] tagValue = null; - if (useTags) { - int tagLen = ((buf.get() & 0xff) << 8) ^ (buf.get() & 0xff); - tagValue = new byte[tagLen]; - buf.get(tagValue); - } - - if (includeMemstoreTS) { - ByteArrayInputStream byte_input = new ByteArrayInputStream(buf.array(), buf.arrayOffset() - + buf.position(), buf.remaining()); - DataInputStream data_input = new DataInputStream(byte_input); - - memstoreTS = WritableUtils.readVLong(data_input); - buf.position(buf.position() + WritableUtils.getVIntSize(memstoreTS)); - } - - // A brute-force check to see that all keys and values are correct. - KeyValue kv = keyValues.get(entriesRead); - assertTrue(Bytes.compareTo(key, kv.getKey()) == 0); - assertTrue(Bytes.compareTo(value, 0, value.length, kv.getValueArray(), kv.getValueOffset(), - kv.getValueLength()) == 0); - if (useTags) { - assertNotNull(tagValue); - KeyValue tkv = kv; - assertEquals(tagValue.length, tkv.getTagsLength()); - assertTrue(Bytes.compareTo(tagValue, 0, tagValue.length, tkv.getTagsArray(), - tkv.getTagsOffset(), tkv.getTagsLength()) == 0); - } + assertEquals(BlockType.ENCODED_DATA, block.getBlockType()); + ByteBuff origBlock = block.getBufferReadOnly(); + int pos = block.headerSize() + DataBlockEncoding.ID_SIZE; + origBlock.position(pos); + origBlock.limit(pos + block.getUncompressedSizeWithoutHeader() - DataBlockEncoding.ID_SIZE); + ByteBuff buf = origBlock.slice(); + RowIndexSeekerV1 seeker = new RowIndexSeekerV1(ctx); + seeker.setCurrentBuffer(buf); + Cell res = seeker.getCell(); + KeyValue kv = keyValues.get(entriesRead); + assertEquals(0, CellComparatorImpl.COMPARATOR.compare(res, kv)); + ++entriesRead; + while(seeker.next()) { + res = seeker.getCell(); + kv = keyValues.get(entriesRead); + assertEquals(0, CellComparatorImpl.COMPARATOR.compare(res, kv)); ++entriesRead; } ++blocksRead; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mob/compactions/TestMobCompactor.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mob/compactions/TestMobCompactor.java index b8e3ce0d44..60405fc114 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mob/compactions/TestMobCompactor.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/mob/compactions/TestMobCompactor.java @@ -325,7 +325,8 @@ public class TestMobCompactor { * (cellNumPerRow * rowNumPerRegion - delCellNum), countMobCells(table)); // After the compaction, the files smaller than the mob compaction merge size // is merge to one file - assertEquals("After compaction: family1 mob file count", largeFilesCount + regionNum, + // after enabling default encoding as ROW_INDEX_V1, expected file count reduced by regionNum + assertEquals("After compaction: family1 mob file count", largeFilesCount, countFiles(tableName, true, family1)); assertEquals("After compaction: family2 mob file count", regionNum * count, countFiles(tableName, true, family2)); -- 2.17.2 (Apple Git-113)