diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java index 8096178..bc5ec61 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java @@ -55,6 +55,7 @@ import sun.misc.Unsafe; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Lists; + import org.apache.hadoop.hbase.util.Bytes.LexicographicalComparerHolder.UnsafeComparer; /** @@ -62,6 +63,7 @@ import org.apache.hadoop.hbase.util.Bytes.LexicographicalComparerHolder.UnsafeCo * comparisons, hash code generation, manufacturing keys for HashMaps or * HashSets, and can be used as key in maps or trees. */ +@SuppressWarnings("restriction") @InterfaceAudience.Public @InterfaceStability.Stable @edu.umd.cs.findbugs.annotations.SuppressWarnings( @@ -121,6 +123,11 @@ public class Bytes implements Comparable { */ public static final int SIZEOF_SHORT = Short.SIZE / Byte.SIZE; + /** + * Mask to apply to a long to reveal the lower int only. Use like this: + * int i = (int)(0xFFFFFFFF00000000l ^ some_long_value); + */ + public static final long MASK_FOR_LOWER_INT_IN_LONG = 0xFFFFFFFF00000000l; /** * Estimate of size cost to pay beyond payload in jvm for instance of byte []. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java index 1e84e6a..1edb4cb 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java @@ -117,7 +117,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { private HFileContext hfileContext; /** Filesystem-level block reader. */ - protected HFileBlock.FSReader fsBlockReader; + private HFileBlock.FSReader fsBlockReader; /** * A "sparse lock" implementation allowing to lock on a particular block @@ -212,8 +212,8 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { fileInfo = new FileInfo(); fileInfo.read(blockIter.nextBlockWithBlockType(BlockType.FILE_INFO).getByteStream()); byte[] creationTimeBytes = fileInfo.get(FileInfo.CREATE_TIME_TS); - this.hfileContext.setFileCreateTime(creationTimeBytes == null ? - 0 : Bytes.toLong(creationTimeBytes)); + this.hfileContext.setFileCreateTime(creationTimeBytes == null? 0: + Bytes.toLong(creationTimeBytes)); lastKey = fileInfo.get(FileInfo.LASTKEY); avgKeyLen = Bytes.toInt(fileInfo.get(FileInfo.AVG_KEY_LEN)); avgValueLen = Bytes.toInt(fileInfo.get(FileInfo.AVG_VALUE_LEN)); @@ -500,29 +500,79 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { } protected void readKeyValueLen() { - blockBuffer.mark(); - currKeyLen = blockBuffer.getInt(); - currValueLen = blockBuffer.getInt(); - if (currKeyLen < 0 || currValueLen < 0 || currKeyLen > blockBuffer.limit() - || currValueLen > blockBuffer.limit()) { - throw new IllegalStateException("Invalid currKeyLen " + currKeyLen + " or currValueLen " - + currValueLen + ". Block offset: " - + block.getOffset() + ", block length: " + blockBuffer.limit() + ", position: " - + blockBuffer.position() + " (without header)."); + // This is a hot method. We go out of our way to make this method short so it can be + // inlined and is not too big to compile. We also manage position in ByteBuffer ourselves + // because it is faster than going via range-checked ByteBuffer methods or going through a + // byte buffer array a byte at a time. + int p = blockBuffer.position() + blockBuffer.arrayOffset(); + // Get a long at a time rather than read two individual ints. In micro-benchmarking, even + // with the extra bit-fiddling, this is order-of-magnitude faster than getting two ints. + long ll = Bytes.toLong(blockBuffer.array(), p); + // Read top half as an int of key length and bottom int as value length + this.currKeyLen = (int)(ll >> Integer.SIZE); + this.currValueLen = (int)(Bytes.MASK_FOR_LOWER_INT_IN_LONG ^ ll); + checkKeyValueLen(); + // Move position past the key and value lengths and then beyond the key and value + p += (Bytes.SIZEOF_LONG + currKeyLen + currValueLen); + if (reader.getFileContext().isIncludesTags()) { + // Tags length is a short. + this.currTagsLen = Bytes.toShort(blockBuffer.array(), p); + checkTagsLen(); + p += (Bytes.SIZEOF_SHORT + currTagsLen); } - ByteBufferUtils.skip(blockBuffer, currKeyLen + currValueLen); - if (this.reader.getFileContext().isIncludesTags()) { - // Read short as unsigned, high byte first - currTagsLen = ((blockBuffer.get() & 0xff) << 8) ^ (blockBuffer.get() & 0xff); - if (currTagsLen < 0 || currTagsLen > blockBuffer.limit()) { - throw new IllegalStateException("Invalid currTagsLen " + currTagsLen + ". Block offset: " - + block.getOffset() + ", block length: " + blockBuffer.limit() + ", position: " - + blockBuffer.position() + " (without header)."); + readMvccVersion(p); + } + + private final void checkTagsLen() { + if (checkLen(this.currTagsLen)) { + throw new IllegalStateException("Invalid currTagsLen " + this.currTagsLen + + ". Block offset: " + block.getOffset() + ", block length: " + this.blockBuffer.limit() + + ", position: " + this.blockBuffer.position() + " (without header)."); + } + } + + /** + * Read mvcc. Does checks to see if we even need to read the mvcc at all. + * @param position + */ + protected void readMvccVersion(final int position) { + // See if we even need to decode mvcc. + if (!this.reader.shouldIncludeMemstoreTS()) return; + if (!this.reader.isDecodeMemstoreTS()) { + currMemstoreTS = 0; + currMemstoreTSLen = 1; + return; + } + _readMvccVersion(position); + } + + /** + * Actually do the mvcc read. Does no checks. + * @param position + */ + private void _readMvccVersion(final int position) { + // This is Bytes#bytesToVint inlined so can save a few instructions in this hot method; i.e. + // previous if one-byte vint, we'd redo the vint call to find int size. + // Also the method is kept small so can be inlined. + byte firstByte = blockBuffer.array()[position]; + int len = WritableUtils.decodeVIntSize(firstByte); + if (len == 1) { + this.currMemstoreTS = firstByte; + } else { + long i = 0; + for (int idx = 0; idx < len - 1; idx++) { + byte b = blockBuffer.array()[position + 1 + idx]; + i = i << 8; + i = i | (b & 0xFF); } - ByteBufferUtils.skip(blockBuffer, currTagsLen); + currMemstoreTS = (WritableUtils.isNegativeVInt(firstByte) ? ~i : i); } - readMvccVersion(); - blockBuffer.reset(); + this.currMemstoreTSLen = len; + } + + protected void readMvccVersion() { + // TODO CLEANUP!!! + readMvccVersion(blockBuffer.arrayOffset() + blockBuffer.position()); } /** @@ -579,7 +629,8 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { } } blockBuffer.reset(); - int keyOffset = blockBuffer.arrayOffset() + blockBuffer.position() + (Bytes.SIZEOF_INT * 2); + int keyOffset = + blockBuffer.arrayOffset() + blockBuffer.position() + (Bytes.SIZEOF_INT * 2); keyOnlyKv.setKey(blockBuffer.array(), keyOffset, klen); int comp = reader.getComparator().compareOnlyKeyPortion(key, keyOnlyKv); @@ -814,16 +865,9 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { } /** - * Go to the next key/value in the block section. Loads the next block if - * necessary. If successful, {@link #getKey()} and {@link #getValue()} can - * be called. - * - * @return true if successfully navigated to the next key/value + * Set the position on current backing blockBuffer. */ - @Override - public boolean next() throws IOException { - assertSeeked(); - + private void positionThisBlockBuffer() { try { blockBuffer.position(getNextCellStartPosition()); } catch (IllegalArgumentException e) { @@ -834,25 +878,39 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { + "; currBlock currBlockOffset = " + block.getOffset()); throw e; } + } - if (blockBuffer.remaining() <= 0) { - long lastDataBlockOffset = - reader.getTrailer().getLastDataBlockOffset(); + /** + * Set our selves up for the next 'next' invocation, set up next block. + * @return True is more to read else false if at the end. + * @throws IOException + */ + private boolean positionForNextBlock() throws IOException { + // Methods are small so they get inlined because they are 'hot'. + long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset(); + if (block.getOffset() >= lastDataBlockOffset) { + setNonSeekedState(); + return false; + } + return isNextBlock(); + } - if (block.getOffset() >= lastDataBlockOffset) { - setNonSeekedState(); - return false; - } - // read the next block - HFileBlock nextBlock = readNextDataBlock(); - if (nextBlock == null) { - setNonSeekedState(); - return false; - } + private boolean isNextBlock() throws IOException { + // Methods are small so they get inlined because they are 'hot'. + HFileBlock nextBlock = readNextDataBlock(); + if (nextBlock == null) { + setNonSeekedState(); + return false; + } + updateCurrBlock(nextBlock); + return true; + } - updateCurrBlock(nextBlock); - return true; + private final boolean _next() throws IOException { + // Small method so can be inlined. It is a hot one. + if (blockBuffer.remaining() <= 0) { + return positionForNextBlock(); } // We are still in the same block. @@ -861,6 +919,22 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { } /** + * Go to the next key/value in the block section. Loads the next block if + * necessary. If successful, {@link #getKey()} and {@link #getValue()} can + * be called. + * + * @return true if successfully navigated to the next key/value + */ + @Override + public boolean next() throws IOException { + // This is a hot method so extreme measures taken to ensure it is small and inlineable. + // Checked by setting: -XX:+UnlockDiagnosticVMOptions -XX:+PrintInlining -XX:+PrintCompilation + assertSeeked(); + positionThisBlockBuffer(); + return _next(); + } + + /** * Positions this scanner at the start of the file. * * @return false if empty file; i.e. a call to next would return false and @@ -909,6 +983,26 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { } /** + * @param v + * @return True if v < 0 or v > current block buffer limit. + */ + protected final boolean checkLen(final int v) { + return v < 0 || v > this.blockBuffer.limit(); + } + + /** + * Check key and value lengths are wholesome. + */ + protected final void checkKeyValueLen() { + if (checkLen(this.currKeyLen) || checkLen(this.currValueLen)) { + throw new IllegalStateException("Invalid currKeyLen " + this.currKeyLen + + " or currValueLen " + this.currValueLen + ". Block offset: " + block.getOffset() + + ", block length: " + this.blockBuffer.limit() + ", position: " + + this.blockBuffer.position() + " (without header)."); + } + } + + /** * Updates the current block to be the given {@link HFileBlock}. Seeks to * the the first key/value pair. * @@ -934,19 +1028,6 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { this.nextIndexedKey = null; } - protected void readMvccVersion() { - if (this.reader.shouldIncludeMemstoreTS()) { - if (this.reader.isDecodeMemstoreTS()) { - currMemstoreTS = Bytes.readAsVLong(blockBuffer.array(), blockBuffer.arrayOffset() - + blockBuffer.position()); - currMemstoreTSLen = WritableUtils.getVIntSize(currMemstoreTS); - } else { - currMemstoreTS = 0; - currMemstoreTSLen = 1; - } - } - } - protected ByteBuffer getFirstKeyInBlock(HFileBlock curBlock) { ByteBuffer buffer = curBlock.getBufferWithoutHeader(); // It is safe to manipulate this buffer because we own the buffer object. @@ -1014,7 +1095,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { */ public final static int KEY_VALUE_LEN_SIZE = 2 * Bytes.SIZEOF_INT; - protected boolean includesMemstoreTS = false; + private boolean includesMemstoreTS = false; protected boolean decodeMemstoreTS = false; @@ -1321,7 +1402,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { private final HFileBlockDecodingContext decodingCtx; private final DataBlockEncoder.EncodedSeeker seeker; private final DataBlockEncoder dataBlockEncoder; - protected final HFileContext meta; + private final HFileContext meta; public EncodedScanner(HFile.Reader reader, boolean cacheBlocks, boolean pread, boolean isCompaction, HFileContext meta) { @@ -1548,7 +1629,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { protected HFileContext createHFileContext(FSDataInputStreamWrapper fsdis, long fileSize, HFileSystem hfs, Path path, FixedFileTrailer trailer) throws IOException { HFileContextBuilder builder = new HFileContextBuilder() - .withIncludesMvcc(this.includesMemstoreTS) + .withIncludesMvcc(shouldIncludeMemstoreTS()) .withHBaseCheckSum(true) .withCompression(this.compressAlgo); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java index 042deed..e29af5c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java @@ -1758,8 +1758,8 @@ public class HStore implements Store { * @return true if the cell is expired */ static boolean isCellTTLExpired(final Cell cell, final long oldestTimestamp, final long now) { - // Do not create an Iterator or Tag objects unless the cell actually has - // tags + // Do not create an Iterator or Tag objects unless the cell actually has tags. + // TODO: This check for tags is really expensive. We decode an int for key and value. Costs. if (cell.getTagsLength() > 0) { // Look for a TTL tag first. Use it instead of the family setting if // found. If a cell has multiple TTLs, resolve the conflict by using the diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java index 15bf2cb..d9809cf 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java @@ -2226,7 +2226,10 @@ public class RSRpcServices implements HBaseRPCErrorHandler, if (maxResultSize <= 0) { maxResultSize = maxQuotaResultSize; } - List values = new ArrayList(); + // This is cells inside a row. Default size is 10 so if many versions or many cfs, + // then we'll resize. Resizings show in profiler. Set it higher than 10. For now + // arbitrary 32. TODO: keep record of general size of results being returned. + List values = new ArrayList(32); region.startRegionOperation(Operation.SCAN); try { int i = 0; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestTags.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestTags.java index 14c6ca9..9c99a43 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestTags.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestTags.java @@ -279,7 +279,7 @@ public class TestTags { Put put = new Put(row); byte[] value = Bytes.toBytes("value"); put.add(fam, qual, HConstants.LATEST_TIMESTAMP, value); - int bigTagLen = Short.MAX_VALUE + 5; + int bigTagLen = Short.MAX_VALUE - 5; put.setAttribute("visibility", new byte[bigTagLen]); table.put(put); Put put1 = new Put(row1);