From 6cb1d9487ee2d736f62bc02de88ecd6c67136a9d Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Tue, 22 May 2018 13:16:11 -0700 Subject: [PATCH] HBASE-20620 HBASE-20564 Tighter ByteBufferKeyValue Cell Comparator; part 2 Adds new stripped-down, faster ByteBufferKeyValue comparator (BBKV is the base Cell-type in hbase2). Creates an instance of new Comparator each time we create new memstore rather than use the universal CellComparator. Remove unused and unneeded Interfaces from Cell base type. --- .../org/apache/hadoop/hbase/BBKVComparator.java | 170 +++++++++++++++++ .../org/apache/hadoop/hbase/CellComparator.java | 8 + .../apache/hadoop/hbase/CellComparatorImpl.java | 210 ++++++++++++--------- .../java/org/apache/hadoop/hbase/ExtendedCell.java | 19 +- .../apache/hadoop/hbase/util/ByteBufferUtils.java | 14 +- .../java/org/apache/hadoop/hbase/util/Bytes.java | 2 +- .../org/apache/hadoop/hbase/util/UnsafeAccess.java | 32 ++-- .../hadoop/hbase/TestByteBufferKeyValue.java | 18 +- .../org/apache/hadoop/hbase/io/hfile/HFile.java | 4 +- .../apache/hadoop/hbase/regionserver/CellSet.java | 2 +- .../hadoop/hbase/regionserver/ChunkCreator.java | 2 +- .../apache/hadoop/hbase/regionserver/HRegion.java | 25 +-- .../apache/hadoop/hbase/regionserver/HStore.java | 4 +- .../hadoop/hbase/regionserver/MemStoreLABImpl.java | 15 +- .../org/apache/hadoop/hbase/wal/WALFactory.java | 6 +- 15 files changed, 386 insertions(+), 145 deletions(-) create mode 100644 hbase-common/src/main/java/org/apache/hadoop/hbase/BBKVComparator.java diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/BBKVComparator.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/BBKVComparator.java new file mode 100644 index 0000000000..335b546ec8 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/BBKVComparator.java @@ -0,0 +1,170 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase; + +import org.apache.hadoop.hbase.util.ByteBufferUtils; +import org.apache.hbase.thirdparty.com.google.common.primitives.Longs; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Comparator; + +/** + * A comparator for case where {@link ByteBufferKeyValue} is prevalent type (BBKV + * is base-type in hbase2). Takes a general comparator as fallback in case types are NOT the + * expected ByteBufferKeyValue. + * + *

This is a tricked-out Comparator at heart of hbase read and write. It is in + * the HOT path so we try all sorts of ugly stuff so we can go faster. See below + * in this javadoc comment for the list. + * + *

Apply this comparator narrowly so it is fed exclusively ByteBufferKeyValues + * as much as is possible so JIT can settle (e.g. make one per ConcurrentSkipListMap + * in HStore). + * + *

Exploits specially added methods in BBKV to save on deserializations of shorts, + * longs, etc: i.e. calculating the family length requires row length; pass it in + * rather than recalculate it, and so on. + * + *

This comparator does static dispatch to private final methods so hotspot is comfortable + * deciding inline. + * + *

Measurement has it that we almost have it so all inlines from memstore + * ConcurrentSkipListMap on down to the (unsafe) intrinisics that do byte compare + * and deserialize shorts and ints; needs a bit more work. + * + *

Does not take a Type to compare: i.e. it is not a Comparator<Cell> or + * CellComparator<Cell> or Comparator<ByteBufferKeyValue> because that adds + * another method to the hierarchy -- from compare(Object, Object) + * to dynamic compare(Cell, Cell) to static private compare -- and inlining doesn't happen if + * hierarchy is too deep (it is the case here). + * + *

Be careful making changes. Compare perf before and after and look at what + * hotspot ends up generating before committing change (jitwatch is helpful here). + * Changing this one class doubled write throughput (HBASE-20483). + */ +@InterfaceAudience.Private +public class BBKVComparator implements Comparator { + protected static final Logger LOG = LoggerFactory.getLogger(BBKVComparator.class); + private final Comparator fallback; + + public BBKVComparator(Comparator fallback) { + this.fallback = fallback; + } + + @Override + public int compare(Object l, Object r) { + // LOG.info("ltype={} rtype={}", l, r); + if ((l instanceof ByteBufferKeyValue) && (r instanceof ByteBufferKeyValue)) { + return compare((ByteBufferKeyValue)l, (ByteBufferKeyValue)r); + } + // Skip calling compare(Object, Object) and go direct to compare(Cell, Cell) + return this.fallback.compare((Cell)l, (Cell)r); + } + + private static final int compare(ByteBufferKeyValue left, ByteBufferKeyValue right) { + // NOTE: Same method is in CellComparatorImpl, also private, not shared, intentionally. Not + // sharing gets us a few percent more throughput in compares. If changes here or there, make + // sure done in both places. + + // Compare Rows. Cache row length. + int leftRowLength = left.getRowLength(); + int rightRowLength = right.getRowLength(); + int diff = ByteBufferUtils.compareTo(left.getRowByteBuffer(), left.getRowPosition(), + leftRowLength, + right.getRowByteBuffer(), right.getRowPosition(), rightRowLength); + if (diff != 0) { + return diff; + } + + // If the column is not specified, the "minimum" key type appears as latest in the sorted + // order, regardless of the timestamp. This is used for specifying the last key/value in a + // given row, because there is no "lexicographically last column" (it would be infinitely long). + // The "maximum" key type does not need this behavior. Copied from KeyValue. This is bad in that + // we can't do memcmp w/ special rules like this. + // TODO: Is there a test for this behavior? + int leftFamilyLengthPosition = left.getFamilyLengthPosition(leftRowLength); + int leftFamilyLength = left.getFamilyLength(leftFamilyLengthPosition); + int leftKeyLength = left.getKeyLength(); + int leftQualifierLength = left.getQualifierLength(leftKeyLength, leftRowLength, + leftFamilyLength); + + // No need of left row length below here. + + byte leftType = left.getTypeByte(leftKeyLength); + if (leftFamilyLength + leftQualifierLength == 0 && + leftType == KeyValue.Type.Minimum.getCode()) { + // left is "bigger", i.e. it appears later in the sorted order + return 1; + } + + int rightFamilyLengthPosition = right.getFamilyLengthPosition(rightRowLength); + int rightFamilyLength = right.getFamilyLength(rightFamilyLengthPosition); + int rightKeyLength = right.getKeyLength(); + int rightQualifierLength = right.getQualifierLength(rightKeyLength, rightRowLength, + rightFamilyLength); + + // No need of right row length below here. + + byte rightType = right.getTypeByte(rightKeyLength); + if (rightFamilyLength + rightQualifierLength == 0 && + rightType == KeyValue.Type.Minimum.getCode()) { + return -1; + } + + // Compare families. + int leftFamilyPosition = left.getFamilyPosition(leftFamilyLengthPosition); + int rightFamilyPosition = right.getFamilyPosition(rightFamilyLengthPosition); + diff = ByteBufferUtils.compareTo(left.getFamilyByteBuffer(), leftFamilyPosition, + leftFamilyLength, + right.getFamilyByteBuffer(), rightFamilyPosition, rightFamilyLength); + if (diff != 0) { + return diff; + } + + // Compare qualifiers + diff = ByteBufferUtils.compareTo(left.getQualifierByteBuffer(), + left.getQualifierPosition(leftFamilyPosition, leftFamilyLength), leftQualifierLength, + right.getQualifierByteBuffer(), + right.getQualifierPosition(rightFamilyPosition, rightFamilyLength), + rightQualifierLength); + if (diff != 0) { + return diff; + } + + // Timestamps. + // Swap order we pass into compare so we get DESCENDING order. + diff = Long.compare(right.getTimestamp(rightKeyLength), left.getTimestamp(leftKeyLength)); + if (diff != 0) { + return diff; + } + + // Compare types. Let the delete types sort ahead of puts; i.e. types + // of higher numbers sort before those of lesser numbers. Maximum (255) + // appears ahead of everything, and minimum (0) appears after + // everything. + diff = (0xff & rightType) - (0xff & leftType); + if (diff != 0) { + return diff; + } + + // Negate following comparisons so later edits show up first mvccVersion: later sorts first + return Longs.compare(right.getSequenceId(), left.getSequenceId()); + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/CellComparator.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/CellComparator.java index 60be67082b..301edcbc3c 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/CellComparator.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/CellComparator.java @@ -21,6 +21,7 @@ import java.util.Comparator; import org.apache.yetus.audience.InterfaceAudience; import org.apache.yetus.audience.InterfaceStability; + /** * Comparator for comparing cells and has some specialized methods that allows comparing individual * cell components like row, family, qualifier and timestamp @@ -130,4 +131,11 @@ public interface CellComparator extends Comparator { * timestamp 0 if both timestamps are equal */ int compareTimestamps(long leftCellts, long rightCellts); + + /** + * @return A dumbed-down, fast comparator for hbase2 base-type, the {@link ByteBufferKeyValue}. + * Create an instance when you make a new memstore, when you know only BBKVs will be passed. + * Do not pollute with types other than BBKV if can be helped; the Comparator will slow. + */ + Comparator getSimpleComparator(); } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/CellComparatorImpl.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/CellComparatorImpl.java index 785d8ffb1c..9a0543b3cb 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/CellComparatorImpl.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/CellComparatorImpl.java @@ -28,15 +28,21 @@ import org.slf4j.LoggerFactory; import org.apache.hbase.thirdparty.com.google.common.primitives.Longs; +import java.util.Comparator; + /** * Compare two HBase cells. Do not use this method comparing -ROOT- or * hbase:meta cells. Cells from these tables need a specialized comparator, one that * takes account of the special formatting of the row where we have commas to delimit table from * regionname, from row. See KeyValue for how it has a special comparator to do hbase:meta cells * and yet another for -ROOT-. - * While using this comparator for {{@link #compareRows(Cell, Cell)} et al, the hbase:meta cells + *

While using this comparator for {{@link #compareRows(Cell, Cell)} et al, the hbase:meta cells * format should be taken into consideration, for which the instance of this comparator * should be used. In all other cases the static APIs in this comparator would be enough + *

HOT methods. We spend a good portion of CPU comparing. Anything that makes the compare + * faster will likely manifest at the macro level. See also + * {@link BBKVComparator}. Use it when mostly {@link ByteBufferKeyValue}s. + *

*/ @edu.umd.cs.findbugs.annotations.SuppressWarnings( value="UNKNOWN", @@ -57,21 +63,17 @@ public class CellComparatorImpl implements CellComparator { public static final CellComparatorImpl META_COMPARATOR = new MetaCellComparator(); @Override - public int compare(Cell a, Cell b) { + public final int compare(final Cell a, final Cell b) { return compare(a, b, false); } - /** - * Compare cells. - * @param ignoreSequenceid True if we are to compare the key portion only and ignore - * the sequenceid. Set to false to compare key and consider sequenceid. - * @return 0 if equal, -1 if a < b, and +1 if a > b. - */ @Override public int compare(final Cell a, final Cell b, boolean ignoreSequenceid) { + int diff = 0; + // "Peel off" the most common path. if (a instanceof ByteBufferKeyValue && b instanceof ByteBufferKeyValue) { - diff = compareByteBufferKeyValue((ByteBufferKeyValue)a, (ByteBufferKeyValue)b); + diff = compare((ByteBufferKeyValue)a, (ByteBufferKeyValue)b); if (diff != 0) { return diff; } @@ -88,82 +90,7 @@ public class CellComparatorImpl implements CellComparator { } // Negate following comparisons so later edits show up first mvccVersion: later sorts first - return ignoreSequenceid? diff: Longs.compare(b.getSequenceId(), a.getSequenceId()); - } - - /** - * Specialized comparator for the ByteBufferKeyValue type exclusivesly. - * Caches deserialized lengths of rows and families, etc., and reuses them where it can - * (ByteBufferKeyValue has been changed to be amenable to our providing pre-made lengths, etc.) - */ - private static final int compareByteBufferKeyValue(ByteBufferKeyValue left, - ByteBufferKeyValue right) { - // Compare Rows. Cache row length. - int leftRowLength = left.getRowLength(); - int rightRowLength = right.getRowLength(); - int diff = ByteBufferUtils.compareTo( - left.getRowByteBuffer(), left.getRowPosition(), leftRowLength, - right.getRowByteBuffer(), right.getRowPosition(), rightRowLength); - if (diff != 0) { - return diff; - } - - // If the column is not specified, the "minimum" key type appears the - // latest in the sorted order, regardless of the timestamp. This is used - // for specifying the last key/value in a given row, because there is no - // "lexicographically last column" (it would be infinitely long). The - // "maximum" key type does not need this behavior. - // Copied from KeyValue. This is bad in that we can't do memcmp w/ special rules like this. - // I tried to get rid of the above but scanners depend on it. TODO. - int leftFamilyLengthPosition = left.getFamilyLengthPosition(leftRowLength); - int leftFamilyLength = left.getFamilyLength(leftFamilyLengthPosition); - int rightFamilyLengthPosition = right.getFamilyLengthPosition(rightRowLength); - int rightFamilyLength = right.getFamilyLength(rightFamilyLengthPosition); - int leftKeyLength = left.getKeyLength(); - int leftQualifierLength = left.getQualifierLength(leftKeyLength, leftRowLength, - leftFamilyLength); - byte leftType = left.getTypeByte(leftKeyLength); - if (leftFamilyLength + leftQualifierLength == 0 && leftType == Type.Minimum.getCode()) { - // left is "bigger", i.e. it appears later in the sorted order - return 1; - } - int rightKeyLength = right.getKeyLength(); - int rightQualifierLength = right.getQualifierLength(rightKeyLength, rightRowLength, - rightFamilyLength); - byte rightType = right.getTypeByte(rightKeyLength); - if (rightFamilyLength + rightQualifierLength == 0 && rightType == Type.Minimum.getCode()) { - return -1; - } - - // Compare families. - int leftFamilyPosition = left.getFamilyPosition(leftFamilyLengthPosition); - int rightFamilyPosition = right.getFamilyPosition(rightFamilyLengthPosition); - diff = ByteBufferUtils.compareTo( - left.getFamilyByteBuffer(), leftFamilyPosition, leftFamilyLength, - right.getFamilyByteBuffer(), rightFamilyPosition, rightFamilyLength); - if (diff != 0) { - return diff; - } - // Compare qualifiers - diff = ByteBufferUtils.compareTo(left.getQualifierByteBuffer(), - left.getQualifierPosition(leftFamilyPosition, leftFamilyLength), leftQualifierLength, - right.getQualifierByteBuffer(), - right.getQualifierPosition(rightFamilyPosition, rightFamilyLength), - rightQualifierLength); - if (diff != 0) { - return diff; - } - // Timestamps. - diff = compareTimestampsInternal(left.getTimestamp(leftKeyLength), - right.getTimestamp(rightKeyLength)); - if (diff != 0) { - return diff; - } - // Compare types. Let the delete types sort ahead of puts; i.e. types - // of higher numbers sort before those of lesser numbers. Maximum (255) - // appears ahead of everything, and minimum (0) appears after - // everything. - return (0xff & rightType) - (0xff & leftType); + return ignoreSequenceid? diff: Long.compare(b.getSequenceId(), a.getSequenceId()); } /** @@ -254,7 +181,7 @@ public class CellComparatorImpl implements CellComparator { return compareRows(left, left.getRowLength(), right, right.getRowLength()); } - int compareRows(final Cell left, int leftRowLength, final Cell right, int rightRowLength) { + static int compareRows(final Cell left, int leftRowLength, final Cell right, int rightRowLength) { // left and right can be exactly the same at the beginning of a row if (left == right) { return 0; @@ -341,7 +268,7 @@ public class CellComparatorImpl implements CellComparator { return diff; } - diff = compareTimestamps(left, right); + diff = compareTimestamps(left.getTimestamp(), right.getTimestamp()); if (diff != 0) { return diff; } @@ -355,16 +282,12 @@ public class CellComparatorImpl implements CellComparator { @Override public int compareTimestamps(final Cell left, final Cell right) { - return compareTimestampsInternal(left.getTimestamp(), right.getTimestamp()); + return compareTimestamps(left.getTimestamp(), right.getTimestamp()); } @Override public int compareTimestamps(final long ltimestamp, final long rtimestamp) { - return compareTimestampsInternal(ltimestamp, rtimestamp); - } - - private static final int compareTimestampsInternal(final long ltimestamp, final long rtimestamp) { - // Swap the times so sort is newest to oldest, descending. + // Swap order we pass into compare so we get DESCENDING order. return Long.compare(rtimestamp, ltimestamp); } @@ -373,6 +296,7 @@ public class CellComparatorImpl implements CellComparator { * {@link KeyValue}s. */ public static class MetaCellComparator extends CellComparatorImpl { + // TODO: Do we need a ByteBufferKeyValue version of this? @Override public int compareRows(final Cell left, final Cell right) { return compareRows(left.getRowArray(), left.getRowOffset(), left.getRowLength(), @@ -451,5 +375,105 @@ public class CellComparatorImpl implements CellComparator { right, rightFarDelimiter, rlength - (rightFarDelimiter - roffset)); return result; } + + @Override + public Comparator getSimpleComparator() { + return this; + } + } + + private static final int compare(ByteBufferKeyValue left, ByteBufferKeyValue right) { + // NOTE: Same method is in CellComparatorImpl, also private, not shared, intentionally. Not + // sharing gets us a few percent more throughput in compares. If changes here or there, make + // sure done in both places. + + // Compare Rows. Cache row length. + int leftRowLength = left.getRowLength(); + int rightRowLength = right.getRowLength(); + int diff = ByteBufferUtils.compareTo(left.getRowByteBuffer(), left.getRowPosition(), + leftRowLength, + right.getRowByteBuffer(), right.getRowPosition(), rightRowLength); + if (diff != 0) { + return diff; + } + + // If the column is not specified, the "minimum" key type appears as latest in the sorted + // order, regardless of the timestamp. This is used for specifying the last key/value in a + // given row, because there is no "lexicographically last column" (it would be infinitely long). + // The "maximum" key type does not need this behavior. Copied from KeyValue. This is bad in that + // we can't do memcmp w/ special rules like this. + // TODO: Is there a test for this behavior? + int leftFamilyLengthPosition = left.getFamilyLengthPosition(leftRowLength); + int leftFamilyLength = left.getFamilyLength(leftFamilyLengthPosition); + int leftKeyLength = left.getKeyLength(); + int leftQualifierLength = left.getQualifierLength(leftKeyLength, leftRowLength, + leftFamilyLength); + + // No need of left row length below here. + + byte leftType = left.getTypeByte(leftKeyLength); + if (leftFamilyLength + leftQualifierLength == 0 && + leftType == KeyValue.Type.Minimum.getCode()) { + // left is "bigger", i.e. it appears later in the sorted order + return 1; + } + + int rightFamilyLengthPosition = right.getFamilyLengthPosition(rightRowLength); + int rightFamilyLength = right.getFamilyLength(rightFamilyLengthPosition); + int rightKeyLength = right.getKeyLength(); + int rightQualifierLength = right.getQualifierLength(rightKeyLength, rightRowLength, + rightFamilyLength); + + // No need of right row length below here. + + byte rightType = right.getTypeByte(rightKeyLength); + if (rightFamilyLength + rightQualifierLength == 0 && + rightType == KeyValue.Type.Minimum.getCode()) { + return -1; + } + + // Compare families. + int leftFamilyPosition = left.getFamilyPosition(leftFamilyLengthPosition); + int rightFamilyPosition = right.getFamilyPosition(rightFamilyLengthPosition); + diff = ByteBufferUtils.compareTo(left.getFamilyByteBuffer(), leftFamilyPosition, + leftFamilyLength, + right.getFamilyByteBuffer(), rightFamilyPosition, rightFamilyLength); + if (diff != 0) { + return diff; + } + + // Compare qualifiers + diff = ByteBufferUtils.compareTo(left.getQualifierByteBuffer(), + left.getQualifierPosition(leftFamilyPosition, leftFamilyLength), leftQualifierLength, + right.getQualifierByteBuffer(), + right.getQualifierPosition(rightFamilyPosition, rightFamilyLength), + rightQualifierLength); + if (diff != 0) { + return diff; + } + + // Timestamps. + // Swap order we pass into compare so we get DESCENDING order. + diff = Long.compare(right.getTimestamp(rightKeyLength), left.getTimestamp(leftKeyLength)); + if (diff != 0) { + return diff; + } + + // Compare types. Let the delete types sort ahead of puts; i.e. types + // of higher numbers sort before those of lesser numbers. Maximum (255) + // appears ahead of everything, and minimum (0) appears after + // everything. + diff = (0xff & rightType) - (0xff & leftType); + if (diff != 0) { + return diff; + } + + // Negate following comparisons so later edits show up first mvccVersion: later sorts first + return Longs.compare(right.getSequenceId(), left.getSequenceId()); + } + + @Override + public Comparator getSimpleComparator() { + return new BBKVComparator(this); } } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/ExtendedCell.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/ExtendedCell.java index 07b0e3f798..f71b7becd2 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/ExtendedCell.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/ExtendedCell.java @@ -30,9 +30,16 @@ import org.apache.yetus.audience.InterfaceAudience; * must implement this. */ @InterfaceAudience.Private -public interface ExtendedCell extends RawCell, HeapSize, Cloneable { +public interface ExtendedCell extends RawCell { + /** + * @return Approximate 'exclusive deep size' of implementing object. Includes + * count of payload and hosting object sizings. + */ + // From HeapSize Interface but trying to shrink how many Interfaces a Cell implements. + long heapSize(); int CELL_NOT_BASED_ON_CHUNK = -1; + /** * Write this cell to an OutputStream in a {@link KeyValue} format. *
KeyValue format
@@ -87,6 +94,13 @@ public interface ExtendedCell extends RawCell, HeapSize, Cloneable { getValueLength(), getTagsLength(), withTags); } + /** + * @return Serialized size (defaults to include tag length). + */ + default int getSerializedSize() { + return getSerializedSize(true); + } + /** * Write this Cell into the given buf's offset in a {@link KeyValue} format. * @param buf The buffer where to write the Cell. @@ -108,7 +122,8 @@ public interface ExtendedCell extends RawCell, HeapSize, Cloneable { /** * Extracts the id of the backing bytebuffer of this cell if it was obtained from fixed sized * chunks as in case of MemstoreLAB - * @return the chunk id if the cell is backed by fixed sized Chunks, else return -1 + * @return the chunk id if the cell is backed by fixed sized Chunks, else return + * {@link #CELL_NOT_BASED_ON_CHUNK}; i.e. -1. */ default int getChunkId() { return CELL_NOT_BASED_ON_CHUNK; diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java index 2246002eea..11745a4d96 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java @@ -50,8 +50,7 @@ public final class ByteBufferUtils { public final static int NEXT_BIT_MASK = 1 << 7; @VisibleForTesting final static boolean UNSAFE_AVAIL = UnsafeAvailChecker.isAvailable(); - @VisibleForTesting - final static boolean UNSAFE_UNALIGNED = UnsafeAvailChecker.unaligned(); + public final static boolean UNSAFE_UNALIGNED = UnsafeAvailChecker.unaligned(); private ByteBufferUtils() { } @@ -630,6 +629,8 @@ public final class ByteBufferUtils { } public static int compareTo(ByteBuffer buf1, int o1, int l1, ByteBuffer buf2, int o2, int l2) { + // NOTE: This method is copied over in BBKVComparator!!!!! For perf reasons. If you make + // changes here, make them there too!!!! if (UNSAFE_UNALIGNED) { long offset1Adj, offset2Adj; Object refObj1 = null, refObj2 = null; @@ -671,9 +672,12 @@ public final class ByteBufferUtils { return compareTo(buf1, o1, l1, buf2, o2, l2) == 0; } + // The below two methods show up in lots of places. Versions of them in commons util and in + // Cassandra. In guava too? They are copied from ByteBufferUtils. They are here as static + // privates. Seems to make code smaller and make Hotspot happier (comes of compares and study + // of compiled code via jitwatch). + public static int compareTo(byte [] buf1, int o1, int l1, ByteBuffer buf2, int o2, int l2) { - // This method is nearly same as the compareTo that follows but hard sharing code given - // byte array and bytebuffer types and this is a hot code path if (UNSAFE_UNALIGNED) { long offset2Adj; Object refObj2 = null; @@ -738,7 +742,7 @@ public final class ByteBufferUtils { long lw = UnsafeAccess.theUnsafe.getLong(obj1, o1 + (long) i); long rw = UnsafeAccess.theUnsafe.getLong(obj2, o2 + (long) i); if (lw != rw) { - if (!UnsafeAccess.littleEndian) { + if (!UnsafeAccess.LITTLE_ENDIAN) { return ((lw + Long.MIN_VALUE) < (rw + Long.MIN_VALUE)) ? -1 : 1; } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java index 6eb09c11df..15facea307 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java @@ -1549,7 +1549,7 @@ public class Bytes implements Comparable { long lw = theUnsafe.getLong(buffer1, offset1Adj + i); long rw = theUnsafe.getLong(buffer2, offset2Adj + i); if (lw != rw) { - if(!UnsafeAccess.littleEndian) { + if(!UnsafeAccess.LITTLE_ENDIAN) { return ((lw + Long.MIN_VALUE) < (rw + Long.MIN_VALUE)) ? -1 : 1; } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/UnsafeAccess.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/UnsafeAccess.java index 486f81beb6..953ad5b533 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/UnsafeAccess.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/UnsafeAccess.java @@ -42,7 +42,7 @@ public final class UnsafeAccess { /** The offset to the first element in a byte array. */ public static final long BYTE_ARRAY_BASE_OFFSET; - static final boolean littleEndian = ByteOrder.nativeOrder() + public static final boolean LITTLE_ENDIAN = ByteOrder.nativeOrder() .equals(ByteOrder.LITTLE_ENDIAN); // This number limits the number of bytes to copy per call to Unsafe's @@ -81,7 +81,7 @@ public final class UnsafeAccess { * @return the short value */ public static short toShort(byte[] bytes, int offset) { - if (littleEndian) { + if (LITTLE_ENDIAN) { return Short.reverseBytes(theUnsafe.getShort(bytes, offset + BYTE_ARRAY_BASE_OFFSET)); } else { return theUnsafe.getShort(bytes, offset + BYTE_ARRAY_BASE_OFFSET); @@ -95,7 +95,7 @@ public final class UnsafeAccess { * @return the int value */ public static int toInt(byte[] bytes, int offset) { - if (littleEndian) { + if (LITTLE_ENDIAN) { return Integer.reverseBytes(theUnsafe.getInt(bytes, offset + BYTE_ARRAY_BASE_OFFSET)); } else { return theUnsafe.getInt(bytes, offset + BYTE_ARRAY_BASE_OFFSET); @@ -109,7 +109,7 @@ public final class UnsafeAccess { * @return the long value */ public static long toLong(byte[] bytes, int offset) { - if (littleEndian) { + if (LITTLE_ENDIAN) { return Long.reverseBytes(theUnsafe.getLong(bytes, offset + BYTE_ARRAY_BASE_OFFSET)); } else { return theUnsafe.getLong(bytes, offset + BYTE_ARRAY_BASE_OFFSET); @@ -125,7 +125,7 @@ public final class UnsafeAccess { * @return incremented offset */ public static int putShort(byte[] bytes, int offset, short val) { - if (littleEndian) { + if (LITTLE_ENDIAN) { val = Short.reverseBytes(val); } theUnsafe.putShort(bytes, offset + BYTE_ARRAY_BASE_OFFSET, val); @@ -140,7 +140,7 @@ public final class UnsafeAccess { * @return incremented offset */ public static int putInt(byte[] bytes, int offset, int val) { - if (littleEndian) { + if (LITTLE_ENDIAN) { val = Integer.reverseBytes(val); } theUnsafe.putInt(bytes, offset + BYTE_ARRAY_BASE_OFFSET, val); @@ -155,7 +155,7 @@ public final class UnsafeAccess { * @return incremented offset */ public static int putLong(byte[] bytes, int offset, long val) { - if (littleEndian) { + if (LITTLE_ENDIAN) { val = Long.reverseBytes(val); } theUnsafe.putLong(bytes, offset + BYTE_ARRAY_BASE_OFFSET, val); @@ -172,7 +172,7 @@ public final class UnsafeAccess { * @return short value at offset */ public static short toShort(ByteBuffer buf, int offset) { - if (littleEndian) { + if (LITTLE_ENDIAN) { return Short.reverseBytes(getAsShort(buf, offset)); } return getAsShort(buf, offset); @@ -186,7 +186,7 @@ public final class UnsafeAccess { * @return short value at offset */ public static short toShort(Object ref, long offset) { - if (littleEndian) { + if (LITTLE_ENDIAN) { return Short.reverseBytes(theUnsafe.getShort(ref, offset)); } return theUnsafe.getShort(ref, offset); @@ -214,7 +214,7 @@ public final class UnsafeAccess { * @return int value at offset */ public static int toInt(ByteBuffer buf, int offset) { - if (littleEndian) { + if (LITTLE_ENDIAN) { return Integer.reverseBytes(getAsInt(buf, offset)); } return getAsInt(buf, offset); @@ -228,7 +228,7 @@ public final class UnsafeAccess { * @return int value at offset */ public static int toInt(Object ref, long offset) { - if (littleEndian) { + if (LITTLE_ENDIAN) { return Integer.reverseBytes(theUnsafe.getInt(ref, offset)); } return theUnsafe.getInt(ref, offset); @@ -256,7 +256,7 @@ public final class UnsafeAccess { * @return long value at offset */ public static long toLong(ByteBuffer buf, int offset) { - if (littleEndian) { + if (LITTLE_ENDIAN) { return Long.reverseBytes(getAsLong(buf, offset)); } return getAsLong(buf, offset); @@ -270,7 +270,7 @@ public final class UnsafeAccess { * @return long value at offset */ public static long toLong(Object ref, long offset) { - if (littleEndian) { + if (LITTLE_ENDIAN) { return Long.reverseBytes(theUnsafe.getLong(ref, offset)); } return theUnsafe.getLong(ref, offset); @@ -297,7 +297,7 @@ public final class UnsafeAccess { * @return incremented offset */ public static int putInt(ByteBuffer buf, int offset, int val) { - if (littleEndian) { + if (LITTLE_ENDIAN) { val = Integer.reverseBytes(val); } if (buf.isDirect()) { @@ -402,7 +402,7 @@ public final class UnsafeAccess { * @return incremented offset */ public static int putShort(ByteBuffer buf, int offset, short val) { - if (littleEndian) { + if (LITTLE_ENDIAN) { val = Short.reverseBytes(val); } if (buf.isDirect()) { @@ -421,7 +421,7 @@ public final class UnsafeAccess { * @return incremented offset */ public static int putLong(ByteBuffer buf, int offset, long val) { - if (littleEndian) { + if (LITTLE_ENDIAN) { val = Long.reverseBytes(val); } if (buf.isDirect()) { diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/TestByteBufferKeyValue.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/TestByteBufferKeyValue.java index 706941154d..6443d84ebd 100644 --- a/hbase-common/src/test/java/org/apache/hadoop/hbase/TestByteBufferKeyValue.java +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/TestByteBufferKeyValue.java @@ -24,6 +24,8 @@ import static org.junit.Assert.assertFalse; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; +import java.util.concurrent.ConcurrentSkipListMap; + import org.apache.hadoop.hbase.KeyValue.Type; import org.apache.hadoop.hbase.testclassification.MiscTests; import org.apache.hadoop.hbase.testclassification.SmallTests; @@ -67,9 +69,23 @@ public class TestByteBufferKeyValue { assertTrue(CellComparatorImpl.COMPARATOR.compare(cell1, cell3) < 0); Cell cell4 = getOffheapCell(row1, Bytes.toBytes("f"), qual2); assertTrue(CellComparatorImpl.COMPARATOR.compare(cell1, cell4) > 0); + BBKVComparator comparator = new BBKVComparator(null); + assertTrue(comparator.compare(cell1, cell2) < 0); + assertTrue(comparator.compare(cell1, cell3) < 0); + assertTrue(comparator.compare(cell1, cell4) > 0); + ByteBuffer buf = ByteBuffer.allocate(row1.length); + ByteBufferUtils.copyFromArrayToBuffer(buf, row1, 0, row1.length); + + ConcurrentSkipListMap map = + new ConcurrentSkipListMap<>(comparator); + map.put((ByteBufferKeyValue)cell1, (ByteBufferKeyValue)cell1); + map.put((ByteBufferKeyValue)cell2, (ByteBufferKeyValue)cell2); + map.put((ByteBufferKeyValue)cell3, (ByteBufferKeyValue)cell3); + map.put((ByteBufferKeyValue)cell1, (ByteBufferKeyValue)cell1); + map.put((ByteBufferKeyValue)cell1, (ByteBufferKeyValue)cell1); } - private Cell getOffheapCell(byte [] row, byte [] family, byte [] qualifier) { + private static Cell getOffheapCell(byte [] row, byte [] family, byte [] qualifier) { KeyValue kvCell = new KeyValue(row, family, qualifier, 0L, Type.Put, row); ByteBuffer buf = ByteBuffer.allocateDirect(kvCell.getBuffer().length); ByteBufferUtils.copyFromArrayToBuffer(buf, kvCell.getBuffer(), 0, kvCell.getBuffer().length); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java index 4c9f6f6d65..5bcaa172d1 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java @@ -332,8 +332,8 @@ public class HFile { try { ostream.setDropBehind(shouldDropBehind && cacheConf.shouldDropBehindCompaction()); } catch (UnsupportedOperationException uoe) { - if (LOG.isTraceEnabled()) LOG.trace("Unable to set drop behind on " + path, uoe); - else if (LOG.isDebugEnabled()) LOG.debug("Unable to set drop behind on " + path); + LOG.trace("Unable to set drop behind on {}", path, uoe); + LOG.debug("Unable to set drop behind on {}", path.getName()); } } return new HFileWriterImpl(conf, cacheConf, path, ostream, comparator, fileContext); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSet.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSet.java index a4fe883668..94a256d934 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSet.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSet.java @@ -53,7 +53,7 @@ public class CellSet implements NavigableSet { private final int numUniqueKeys; CellSet(final CellComparator c) { - this.delegatee = new ConcurrentSkipListMap<>(c); + this.delegatee = new ConcurrentSkipListMap<>(c.getSimpleComparator()); this.numUniqueKeys = UNKNOWN_NUM_UNIQUES; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ChunkCreator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ChunkCreator.java index 5577be47ad..44ba65f620 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ChunkCreator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ChunkCreator.java @@ -46,7 +46,7 @@ import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFacto @InterfaceAudience.Private public class ChunkCreator { private static final Logger LOG = LoggerFactory.getLogger(ChunkCreator.class); - // monotonically increasing chunkid + // monotonically increasing chunkid. Starts at 1. private AtomicInteger chunkID = new AtomicInteger(1); // maps the chunk against the monotonically increasing chunk id. We need to preserve the // natural ordering of the key diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index a1fcaf2493..1a4320d669 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -2533,8 +2533,8 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi } MemStoreSize mss = this.memStoreSizing.getMemStoreSize(); LOG.info("Flushing " + + storesToFlush.size() + "/" + stores.size() + " column families," + - " memstore data size=" + StringUtils.byteDesc(mss.getDataSize()) + - " memstore heap size=" + StringUtils.byteDesc(mss.getHeapSize()) + + " dataSize=" + StringUtils.byteDesc(mss.getDataSize()) + + " heapSize=" + StringUtils.byteDesc(mss.getHeapSize()) + ((perCfExtras != null && perCfExtras.length() > 0)? perCfExtras.toString(): "") + ((wal != null) ? "" : "; WAL is null, using passed sequenceid=" + sequenceId)); } @@ -2724,10 +2724,10 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi long time = EnvironmentEdgeManager.currentTime() - startTime; MemStoreSize mss = prepareResult.totalFlushableSize.getMemStoreSize(); long memstoresize = this.memStoreSizing.getMemStoreSize().getDataSize(); - String msg = "Finished memstore flush;" - + " data size ~" + StringUtils.byteDesc(mss.getDataSize()) + "/" + mss.getDataSize() - + ", heap size ~" + StringUtils.byteDesc(mss.getHeapSize()) + "/" + mss.getHeapSize() - + ", currentsize=" + StringUtils.byteDesc(memstoresize) + "/" + memstoresize + String msg = "Finished flush of" + + " dataSize ~" + StringUtils.byteDesc(mss.getDataSize()) + "/" + mss.getDataSize() + + ", heapSize ~" + StringUtils.byteDesc(mss.getHeapSize()) + "/" + mss.getHeapSize() + + ", currentSize=" + StringUtils.byteDesc(memstoresize) + "/" + memstoresize + " for " + this.getRegionInfo().getEncodedName() + " in " + time + "ms, sequenceid=" + flushOpSeqId + ", compaction requested=" + compactionRequested + ((wal == null) ? "; wal=null" : ""); @@ -4140,7 +4140,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi * @param cellItr * @param now */ - public void updateCellTimestamps(final Iterable> cellItr, final byte[] now) + private static void updateCellTimestamps(final Iterable> cellItr, final byte[] now) throws IOException { for (List cells: cellItr) { if (cells == null) continue; @@ -4195,12 +4195,12 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi requestFlush(); // Don't print current limit because it will vary too much. The message is used as a key // over in RetriesExhaustedWithDetailsException processing. - throw new RegionTooBusyException("Over memstore limit; regionName=" + + throw new RegionTooBusyException("Over memstore limit=" + + org.apache.hadoop.hbase.procedure2.util.StringUtils.humanSize(this.blockingMemStoreSize) + + ", regionName=" + (this.getRegionInfo() == null? "unknown": this.getRegionInfo().getEncodedName()) + - ", server=" + (this.getRegionServerServices() == null ? "unknown": - this.getRegionServerServices().getServerName()) + - ", blockingMemStoreSize=" + - org.apache.hadoop.hbase.procedure2.util.StringUtils.humanSize(blockingMemStoreSize)); + ", server=" + (this.getRegionServerServices() == null? "unknown": + this.getRegionServerServices().getServerName())); } } @@ -8504,4 +8504,5 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi public void requestFlush(FlushLifeCycleTracker tracker) throws IOException { requestFlush0(tracker); } + } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java index 4232f83b1d..2526082139 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java @@ -24,6 +24,7 @@ import java.net.InetSocketAddress; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -330,8 +331,7 @@ public class HStore implements Store, HeapSize, StoreConfigInformation, Propagat } switch (inMemoryCompaction) { case NONE: - ms = ReflectionUtils.newInstance(DefaultMemStore.class, - new Object[]{conf, this.comparator}); + ms = ReflectionUtils.newInstance(DefaultMemStore.class, new Object[]{conf, this.comparator}); break; default: Class clz = conf.getClass(MEMSTORE_CLASS_NAME, diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreLABImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreLABImpl.java index 0db0fd9c28..ac7223f2d8 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreLABImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreLABImpl.java @@ -120,12 +120,13 @@ public class MemStoreLABImpl implements MemStoreLAB { */ @Override public Cell forceCopyOfBigCellInto(Cell cell) { - int size = KeyValueUtil.length(cell) + ChunkCreator.SIZEOF_CHUNK_HEADER; + int size = cell instanceof ExtendedCell? ((ExtendedCell)cell).getSerializedSize(): + KeyValueUtil.length(cell); + size += ChunkCreator.SIZEOF_CHUNK_HEADER; Preconditions.checkArgument(size >= 0, "negative size"); if (size <= dataChunkSize) { // Using copyCellInto for cells which are bigger than the original maxAlloc - Cell newCell = copyCellInto(cell, dataChunkSize); - return newCell; + return copyCellInto(cell, dataChunkSize); } else { Chunk c = getNewExternalChunk(size); int allocOffset = c.alloc(size); @@ -134,7 +135,8 @@ public class MemStoreLABImpl implements MemStoreLAB { } private Cell copyCellInto(Cell cell, int maxAlloc) { - int size = KeyValueUtil.length(cell); + int size = cell instanceof ExtendedCell? ((ExtendedCell)cell).getSerializedSize(): + KeyValueUtil.length(cell); Preconditions.checkArgument(size >= 0, "negative size"); // Callers should satisfy large allocations directly from JVM since they // don't cause fragmentation as badly. @@ -169,7 +171,7 @@ public class MemStoreLABImpl implements MemStoreLAB { * Clone the passed cell by copying its data into the passed buf and create a cell with a chunkid * out of it */ - private Cell copyToChunkCell(Cell cell, ByteBuffer buf, int offset, int len) { + private static Cell copyToChunkCell(Cell cell, ByteBuffer buf, int offset, int len) { int tagsLen = cell.getTagsLength(); if (cell instanceof ExtendedCell) { ((ExtendedCell) cell).write(buf, offset); @@ -255,8 +257,7 @@ public class MemStoreLABImpl implements MemStoreLAB { */ private Chunk getOrMakeChunk() { // Try to get the chunk - Chunk c; - c = currChunk.get(); + Chunk c = currChunk.get(); if (c != null) { return c; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALFactory.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALFactory.java index 7727b108c6..24604d988e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALFactory.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALFactory.java @@ -47,9 +47,11 @@ import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesti * implementations: *
    *
  • defaultProvider : whatever provider is standard for the hbase version. Currently - * "filesystem"
  • + * "asyncfs" + *
  • asyncfs : a provider that will run on top of an implementation of the Hadoop + * FileSystem interface via an asynchronous client.
  • *
  • filesystem : a provider that will run on top of an implementation of the Hadoop - * FileSystem interface, normally HDFS.
  • + * FileSystem interface via HDFS's synchronous DFSClient. *
  • multiwal : a provider that will use multiple "filesystem" wal instances per region * server.
  • *
-- 2.16.3