Index: src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileInlineToRootChunkConversion.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileInlineToRootChunkConversion.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileInlineToRootChunkConversion.java (revision 0) @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.io.hfile; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.Test; + +/** + * Test a case when an inline index chunk is converted to a root one. This reproduces the bug in + * HBASE-6871. We write a carefully selected number of relatively large keys so that we accumulate + * a leaf index chunk that only goes over the configured index chunk size after adding the last + * key/value. The bug is in that when we close the file, we convert that inline (leaf-level) chunk + * into a root chunk, but then look at the size of that root chunk, find that it is greater than + * the configured chunk size, and split it into a number of intermediate index blocks that should + * really be leaf-level blocks. If more keys were added, we would flush the leaf-level block, add + * another entry to the root-level block, and that would prevent us from upgrading the leaf-level + * chunk to the root chunk, thus not triggering the bug. + */ +public class TestHFileInlineToRootChunkConversion { + + private final HBaseTestingUtility testUtil = new HBaseTestingUtility(); + private final Configuration conf = testUtil.getConfiguration(); + + @Test + public void testWriteHFile() throws Exception { + Path hfPath = new Path(testUtil.getDataTestDir(), + TestHFileInlineToRootChunkConversion.class.getSimpleName() + ".hfile"); + int maxChunkSize = 1024; + FileSystem fs = FileSystem.get(conf); + CacheConfig cacheConf = new CacheConfig(conf); + conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, maxChunkSize); + HFileWriterV2 hfw = + (HFileWriterV2) new HFileWriterV2.WriterFactoryV2(conf, cacheConf) + .withBlockSize(16) + .withPath(fs, hfPath).create(); + List keys = new ArrayList(); + StringBuilder sb = new StringBuilder(); + + for (int i = 0; i < 4; ++i) { + sb.append("key" + String.format("%05d", i)); + sb.append("_"); + for (int j = 0; j < 100; ++j) { + sb.append('0' + j); + } + String keyStr = sb.toString(); + sb.setLength(0); + + byte[] k = Bytes.toBytes(keyStr); + System.out.println("Key: " + Bytes.toString(k)); + keys.add(k); + byte[] v = Bytes.toBytes("value" + i); + hfw.append(k, v); + } + hfw.close(); + + HFileReaderV2 reader = (HFileReaderV2) HFile.createReader(fs, hfPath, cacheConf); + HFileScanner scanner = reader.getScanner(true, true); + for (int i = 0; i < keys.size(); ++i) { + scanner.seekTo(keys.get(i)); + } + reader.close(); + } + +} Index: src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java (revision 1388010) +++ src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java (working copy) @@ -40,6 +40,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.KeyValue.KeyComparator; import org.apache.hadoop.hbase.io.HbaseMapWritable; @@ -49,6 +50,7 @@ import org.apache.hadoop.io.RawComparator; import org.apache.hadoop.io.Writable; +import com.google.common.base.Preconditions; import com.google.common.collect.Lists; /** @@ -241,6 +243,13 @@ public static abstract class WriterFactory { protected Configuration conf; protected CacheConfig cacheConf; + protected FileSystem fs; + protected Path path; + protected FSDataOutputStream ostream; + protected int blockSize = HColumnDescriptor.DEFAULT_BLOCKSIZE; + protected Compression.Algorithm compression = + HFile.DEFAULT_COMPRESSION_ALGORITHM; + protected KeyComparator comparator; WriterFactory(Configuration conf, CacheConfig cacheConf) { this.conf = conf; @@ -265,6 +274,27 @@ public abstract Writer createWriter(final FSDataOutputStream ostream, final int blockSize, final Compression.Algorithm compress, final KeyComparator c) throws IOException; + + public WriterFactory withBlockSize(int blockSize) { + this.blockSize = blockSize; + return this; + } + public WriterFactory withPath(FileSystem fs, Path path) { + Preconditions.checkNotNull(fs); + Preconditions.checkNotNull(path); + this.fs = fs; + this.path = path; + return this; + } + + public Writer create() throws IOException { + if ((path != null ? 1 : 0) + (ostream != null ? 1 : 0) != 1) { + throw new AssertionError("Please specify exactly one of " + + "filesystem/path or path"); + } + return createWriter(fs, path, blockSize, + compression, comparator); + } } /** The configuration key for HFile version to use for new files */ Index: src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java (revision 1388010) +++ src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java (working copy) @@ -718,7 +718,7 @@ * @throws IOException */ public long writeIndexBlocks(FSDataOutputStream out) throws IOException { - if (curInlineChunk.getNumEntries() != 0) { + if (curInlineChunk != null && curInlineChunk.getNumEntries() != 0) { throw new IOException("Trying to write a multi-level block index, " + "but are " + curInlineChunk.getNumEntries() + " entries in the " + "last inline chunk."); @@ -729,9 +729,11 @@ byte[] midKeyMetadata = numLevels > 1 ? rootChunk.getMidKeyMetadata() : null; - while (rootChunk.getRootSize() > maxChunkSize) { - rootChunk = writeIntermediateLevel(out, rootChunk); - numLevels += 1; + if (curInlineChunk != null) { + while (rootChunk.getRootSize() > maxChunkSize) { + rootChunk = writeIntermediateLevel(out, rootChunk); + numLevels += 1; + } } // write the root level @@ -890,11 +892,18 @@ */ @Override public boolean shouldWriteBlock(boolean closing) { - if (singleLevelOnly) + if (singleLevelOnly) { throw new UnsupportedOperationException(INLINE_BLOCKS_NOT_ALLOWED); + } - if (curInlineChunk.getNumEntries() == 0) + if (curInlineChunk == null) { + throw new IllegalStateException("curInlineChunk is null; has shouldWriteBlock been " + + "called with closing=true and then called again?"); + } + + if (curInlineChunk.getNumEntries() == 0) { return false; + } // We do have some entries in the current inline chunk. if (closing) { @@ -904,7 +913,7 @@ expectNumLevels(1); rootChunk = curInlineChunk; - curInlineChunk = new BlockIndexChunk(); + curInlineChunk = null; // Disallow adding any more index entries. return false; }