From 58613ccd56038b605c0df8cf3ae6cd98a899fce5 Mon Sep 17 00:00:00 2001 From: Ben Lau Date: Fri, 21 Aug 2015 02:32:32 -0700 Subject: [PATCH] Fix seekBefore() --- .../hadoop/hbase/io/hfile/HFileBlockIndex.java | 51 +++++ .../hadoop/hbase/io/hfile/HFileReaderImpl.java | 21 +- .../apache/hadoop/hbase/HBaseTestingUtility.java | 14 ++ .../hadoop/hbase/client/TestFromClientSide.java | 96 ++++++++++ .../hadoop/hbase/io/hfile/TestSeekBefore.java | 201 ++++++++++++++++++++ 5 files changed, 382 insertions(+), 1 deletion(-) create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestSeekBefore.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java index 6f800c2..57e0cd4 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java @@ -26,6 +26,7 @@ import java.io.DataOutputStream; import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.concurrent.atomic.AtomicReference; @@ -663,6 +664,56 @@ public class HFileBlockIndex { * Key to find */ public abstract int rootBlockContainingKey(final Cell key); + + + /** + * Calculate the amount of bytes taken up by the potential leaf index block between two + * consecutive data blocks, data block 1 and data block 2. Returns -1 if this function + * doesn't know. + */ + public long calculateLeafIndexBlockSpaceBetweenConsecutiveDataBlocks(long dataBlockOffset1, + long dataBlockOffset2) { + + if (dataBlockOffset1 > dataBlockOffset2) { + throw new IllegalArgumentException("Data block offset 1 > data block offset 2: " + + dataBlockOffset1 + " , " + dataBlockOffset2); + } + + // Single level index will not have any leaf index blocks. 3+ level index will not have + // any leaf index block offset information in-memory available at the root + if (searchTreeLevel == 1) { + return 0; + } else if (searchTreeLevel != 2) { + return -1; + } + + // Shouldn't find an exact match in the block offset array + int binSearchPos = Arrays.binarySearch(blockOffsets, dataBlockOffset1); + if (binSearchPos >= 0) { + throw new IllegalArgumentException("Data block offset was provided that is actually an " + + "index block offset: " + dataBlockOffset1); + } + + int insertPos = (binSearchPos + 1) * -1; + int indexBlocksFound = 0; + long totalIndexBlockSize = 0; + for (int i = insertPos; i < blockOffsets.length; i++) { + long indexBlockOffset = blockOffsets[i]; + if (dataBlockOffset1 < indexBlockOffset && indexBlockOffset < dataBlockOffset2) { + totalIndexBlockSize += blockDataSizes[i]; + indexBlocksFound++; + } else { + break; + } + } + + if (indexBlocksFound > 1) { + throw new IllegalStateException("Found more than 1 index block between 2 consecutive " + + "data blocks"); + } + + return totalIndexBlockSize; + } /** * The indexed key at the ith position in the nonRootIndex. The position starts at 0. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java index 18b9b64..b7bf32c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java @@ -843,11 +843,30 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { // seekBefore key. We will go ahead by reading the next block that satisfies the // given key. Return the current block before reading the next one. reader.returnBlock(seekToBlock); + + long prevBlockSize = seekToBlock.getOffset() - previousBlockOffset; + + // There can be an index block between 2 consecutive data blocks. + // Figure out if there is and subtract out the index block space. + // If we can't figure it out, set size to -1 so the reader.readBlock() + // will figure it out from the header. This isn't efficient and is + // a temporary bug fix to be improved in a future version of HBase + // using a new HFile format + long prevIndexBlockSpace = this.reader.getDataBlockIndexReader() + .calculateLeafIndexBlockSpaceBetweenConsecutiveDataBlocks( + previousBlockOffset, seekToBlock.getOffset()); + + if (prevIndexBlockSpace < 0) { + prevBlockSize = -1; + } else { + prevBlockSize -= prevIndexBlockSpace; + } + // It is important that we compute and pass onDiskSize to the block // reader so that it does not have to read the header separately to // figure out the size. seekToBlock = reader.readBlock(previousBlockOffset, - seekToBlock.getOffset() - previousBlockOffset, cacheBlocks, + prevBlockSize, cacheBlocks, pread, isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding()); // TODO shortcut: seek forward in this block to the last key of the // block. diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java index a3a1e61..2932c78 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java @@ -1466,6 +1466,20 @@ public class HBaseTestingUtility extends HBaseCommonTestingUtility { waitUntilAllRegionsAssigned(htd.getTableName()); return (HTable) getConnection().getTable(htd.getTableName()); } + + /** + * Create a table. + * @param htd + * @return An HTable instance for the created table. + * @throws IOException + */ + public HTable createTable(HTableDescriptor htd) throws IOException { + getHBaseAdmin().createTable(htd); + // HBaseAdmin only waits for regions to appear in hbase:meta + // we should wait until they are assigned + waitUntilAllRegionsAssigned(htd.getTableName()); + return (HTable) getConnection().getTable(htd.getTableName()); + } /** * Create a table. diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java index 3e988d6..7c587da 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java @@ -36,6 +36,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.NavigableMap; +import java.util.Random; import java.util.UUID; import java.util.concurrent.Callable; import java.util.concurrent.ExecutorService; @@ -83,6 +84,7 @@ import org.apache.hadoop.hbase.filter.WhileMatchFilter; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.hfile.BlockCache; import org.apache.hadoop.hbase.io.hfile.CacheConfig; +import org.apache.hadoop.hbase.io.hfile.TestHFileWriterV2; import org.apache.hadoop.hbase.ipc.CoprocessorRpcChannel; import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; @@ -5704,6 +5706,100 @@ public class TestFromClientSide { assertEquals(insertNum, count); } + + /** + * Black box test to do some client-side random testing of reverse scan. + * Also some incidental testing of different data block encodings. + */ + @Test + public void testReverseScanRandomTable() throws Exception { + for (DataBlockEncoding encoding : DataBlockEncoding.values()) { + byte[] fam = Bytes.toBytes("family"); + + HTableDescriptor htd = new HTableDescriptor(TableName.valueOf("testReverseScanRandomTable"+encoding.name())); + HColumnDescriptor hcd = new HColumnDescriptor(fam); + hcd.setDataBlockEncoding(encoding); + htd.addFamily(hcd); + + HTable ht = TEST_UTIL.createTable(htd); + + Random rand = new Random(1923); + + int NUM_ROWS = 4000; + int NUM_CELLS_PER_ROW = 4; + + byte[][] row_keys = new byte[NUM_ROWS*NUM_CELLS_PER_ROW][]; + byte[][] values = new byte[NUM_ROWS*NUM_CELLS_PER_ROW][]; + byte[][] quals = new byte[NUM_ROWS*NUM_CELLS_PER_ROW][]; + + for (int i = 0; i < NUM_ROWS; i++) { + byte[] row = TestHFileWriterV2.randomOrderedKey(rand, i); + // Want to put the cells of a row in the opposite order into the arrays + // Reverse scan only reverses row order not cell order within rows. + // So when our test checks the arrays backwards during a reverse scan, + // we will check the cells in forward order which is correct + for (int j = NUM_CELLS_PER_ROW-1; j >= 0; j--) { + byte[] qual = TestHFileWriterV2.randomOrderedKey(rand, (NUM_CELLS_PER_ROW-1-j)); + byte[] value = TestHFileWriterV2.randomValue(rand); + + int arrayIndex = i*NUM_CELLS_PER_ROW + j; + row_keys[arrayIndex] = row; + quals[arrayIndex] = qual; + values[arrayIndex] = value; + + Put put = new Put(row); + put.add(fam, qual, value); + + ht.put(put); + } + } + ht.flushCommits(); + + Scan scan = new Scan(); + scan.setCacheBlocks(false); + scan.setReversed(true); + + // Scan every row except the very first in the table + // (stopRow is exclusive.) + scan.setStartRow(row_keys[row_keys.length-1]); + scan.setStopRow(row_keys[0]); + + ResultScanner scanner = ht.getScanner(scan); + int i = NUM_ROWS*NUM_CELLS_PER_ROW - 1; + int numCellsScanned = 0; + + while (true) { + Result next = scanner.next(); + if (next == null || next.isEmpty()) { + break; + } + + List cells = next.listCells(); + for (Cell cell : cells) { + assertTrue(reverseScanFailMsg(i, CellUtil.cloneRow(cell), row_keys[i]), + CellUtil.matchingRow(cell, row_keys[i])); + assertTrue(reverseScanFailMsg(i, CellUtil.cloneQualifier(cell), quals[i]), + CellUtil.matchingQualifier(cell, quals[i])); + assertTrue(reverseScanFailMsg(i, CellUtil.cloneValue(cell), values[i]), + CellUtil.matchingValue(cell, values[i])); + + i--; + numCellsScanned++; + } + + assertEquals(NUM_CELLS_PER_ROW, next.getFamilyMap(fam).size()); + } + + // We scan all rows in the table except 1 + assertEquals(numCellsScanned, NUM_CELLS_PER_ROW*NUM_ROWS - NUM_CELLS_PER_ROW); + + ht.close(); + } + } + + private String reverseScanFailMsg(int iteration, byte[] a, byte[] b) { + return String.format("Failed to match on iteration %s between %s and %s", iteration, Bytes.toStringBinary(a), Bytes.toStringBinary(b)); + } @Test public void testSuperSimpleWithReverseScan() throws Exception { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestSeekBefore.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestSeekBefore.java new file mode 100644 index 0000000..b26f594 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestSeekBefore.java @@ -0,0 +1,201 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.io.hfile; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Random; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.fs.HFileSystem; +import org.apache.hadoop.hbase.regionserver.BloomType; +import org.apache.hadoop.hbase.regionserver.StoreFile; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.util.BloomFilterFactory; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category(MediumTests.class) +public class TestSeekBefore { + + private static final Log LOG = LogFactory.getLog(TestSeekBefore.class); + + private static final HBaseTestingUtility TEST_UTIL = + new HBaseTestingUtility(); + + private static final int NUM_KV = 10000; + + private static final int DATA_BLOCK_SIZE = 4096; + private static final int BLOOM_BLOCK_SIZE = 2048; + private static final int[] INDEX_CHUNK_SIZES = { 65536, 4096, 1024 }; + private static final int[] EXPECTED_NUM_LEVELS = { 1, 2, 3 }; + + private static final Random RAND = new Random(192537); + private static final byte[] FAM = Bytes.toBytes("family"); + + private FileSystem fs; + private Configuration conf; + + /** + * Scanner.seekBefore() can fail because when seeking to a previous HFile data block, it needs to + * know the size of that data block, which it calculates using current data block offset and the + * previous data block offset. This fails to work when there are leaf-level index blocks in the + * scannable section of the HFile, i.e. starting in HFileV2. This test will try seekBefore() on + * a flat (single-level) and multi-level (2,3) HFile and confirm this bug is now fixed. This + * bug also happens for inline Bloom blocks. + */ + @Test + public void testMultiIndexLevelRandomHFileWithBlooms() throws IOException { + conf = TEST_UTIL.getConfiguration(); + + // Try out different HFile versions to ensure reverse scan works on each version + for (int hfileVersion = 3; + hfileVersion <= HFile.MAX_FORMAT_VERSION; hfileVersion++) { + + conf.setInt(HFile.FORMAT_VERSION_KEY, hfileVersion); + fs = HFileSystem.get(conf); + + // Try out different bloom types because inline Bloom blocks break seekBefore() + for (BloomType bloomType : BloomType.values()) { + + // Disable caching to prevent it from hiding any bugs in block seeks/reads + conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f); + CacheConfig cacheConf = new CacheConfig(conf); + + // Test out HFile block indices of various sizes/levels + for (int testI = 0; testI < INDEX_CHUNK_SIZES.length; testI++) { + int indexBlockSize = INDEX_CHUNK_SIZES[testI]; + int expectedNumLevels = EXPECTED_NUM_LEVELS[testI]; + + LOG.info(String.format("Testing HFileVersion: %s, BloomType: %s, Index Levels: %s", + hfileVersion, bloomType, expectedNumLevels)); + + conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, indexBlockSize); + conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_BLOCK_SIZE, BLOOM_BLOCK_SIZE); + + byte[][] keys = new byte[NUM_KV][]; + byte[][] values = new byte[NUM_KV][]; + + Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), + "testMultiIndexLevelRandomHFile" + testI); + + // Write the HFile + { + HFileContext meta = new HFileContextBuilder() + .withBlockSize(DATA_BLOCK_SIZE) + .build(); + + StoreFile.Writer storeFileWriter = new StoreFile.WriterBuilder(conf, cacheConf, fs) + .withFilePath(hfilePath) + .withFileContext(meta) + // TODO: Switch this to "bloomType" and the test stops passing + // because reverse scan breaks on inline bloom filter blocks + .withBloomType(BloomType.NONE) + .build(); + + for (int i = 0; i < NUM_KV; i++) { + byte[] row = TestHFileWriterV2.randomOrderedKey(RAND, i); + byte[] qual = TestHFileWriterV2.randomRowOrQualifier(RAND); + byte[] value = TestHFileWriterV2.randomValue(RAND); + KeyValue kv = new KeyValue(row, FAM, qual, value); + + storeFileWriter.append(kv); + keys[i] = kv.getKey(); + values[i] = value; + } + + storeFileWriter.close(); + } + + // Read the HFile + HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf, conf); + + // Sanity check the HFile index level + assertEquals(expectedNumLevels, reader.getTrailer().getNumDataIndexLevels()); + + // Check that we can seekBefore in either direction and with both pread + // enabled and disabled + for (boolean pread : new boolean[] { false, true }) { + HFileScanner scanner = reader.getScanner(true, pread); + checkNoSeekBefore(keys, scanner, 0); + for (int i = 1; i < NUM_KV; i++) { + checkSeekBefore(keys, scanner, i); + checkKeyValue("i=" + i, keys[i-1], values[i-1], + ByteBuffer.wrap(((KeyValue) scanner.getKey()).getKey()), + scanner.getValue()); + } + assertTrue(scanner.seekTo()); + for (int i = NUM_KV - 1; i >= 1; i--) { + checkSeekBefore(keys, scanner, i); + checkKeyValue("i=" + i, keys[i-1], values[i-1], + ByteBuffer.wrap(((KeyValue) scanner.getKey()).getKey()), + scanner.getValue()); + } + checkNoSeekBefore(keys, scanner, 0); + } + + reader.close(); + } + } + } + } + + private void checkSeekBefore(byte[][] keys, HFileScanner scanner, int i) + throws IOException { + assertEquals("Failed to seek to the key before #" + i + " (" + + Bytes.toStringBinary(keys[i]) + ")", true, scanner.seekBefore(new KeyValue.KeyOnlyKeyValue(keys[i]))); + } + + private void checkNoSeekBefore(byte[][] keys, HFileScanner scanner, int i) + throws IOException { + assertEquals("Incorrectly succeeded in seeking to before first key (" + + Bytes.toStringBinary(keys[i]) + ")", false, scanner.seekBefore(new KeyValue.KeyOnlyKeyValue(keys[i]))); + } + + private void assertArrayEqualsBuffer(String msgPrefix, byte[] arr, + ByteBuffer buf) { + assertEquals(msgPrefix + ": expected " + Bytes.toStringBinary(arr) + + ", actual " + Bytes.toStringBinary(buf), 0, Bytes.compareTo(arr, 0, + arr.length, buf.array(), buf.arrayOffset(), buf.limit())); + } + + /** Check a key/value pair after it was read by the reader */ + private void checkKeyValue(String msgPrefix, byte[] expectedKey, + byte[] expectedValue, ByteBuffer keyRead, ByteBuffer valueRead) { + if (!msgPrefix.isEmpty()) + msgPrefix += ". "; + + assertArrayEqualsBuffer(msgPrefix + "Invalid key", expectedKey, keyRead); + assertArrayEqualsBuffer(msgPrefix + "Invalid value", expectedValue, + valueRead); + } +} + -- 1.7.10.2 (Apple Git-33)