Index: src/test/java/org/apache/hadoop/hbase/regionserver/TestKVComparisonPerformance.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/regionserver/TestKVComparisonPerformance.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/regionserver/TestKVComparisonPerformance.java (revision 0) @@ -0,0 +1,90 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import java.util.ArrayList; +import java.util.Random; +import java.util.concurrent.TimeUnit; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.SmallTests; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import com.google.common.collect.Lists; + +/** + * Micro-benchmark for performance of KV comparison + */ +@Category(SmallTests.class) +public class TestKVComparisonPerformance { + + Random rng = new Random(); + + @Test + public void testPerformance() { + ArrayList kvs = Lists.newArrayList(); + + byte[] row = new byte[10]; + byte[] family = new byte[5]; + byte[] qualifier = new byte[5]; + byte[] value = new byte[20]; + + // Create 2M key values in an array + // Each pair of adjacent values differs only in their timestamp + // but are otherwise random. + for (int i = 0; i < 1000000; i++) { + rng.nextBytes(row); + rng.nextBytes(family); + rng.nextBytes(qualifier); + rng.nextBytes(value); + + long ts = 10; + KeyValue kv = new KeyValue(row, family, qualifier, + ts, KeyValue.Type.Put, value); + kvs.add(kv); + + ts += 15; + kv = new KeyValue(row, family, qualifier, + ts, KeyValue.Type.Put, value); + kvs.add(kv); + } + + int TRIALS = 100; + long sum = 0; + + for (int i = 0; i < TRIALS; i++) { + long st = System.nanoTime(); + int result = 0; + for (int j = 1; j < kvs.size(); j++) { + result += KeyValue.COMPARATOR.compare( + kvs.get(j-1), kvs.get(j)); + } + long et = System.nanoTime(); + long us = TimeUnit.MICROSECONDS.convert(et-st, TimeUnit.NANOSECONDS); + System.out.println("Run " + i + " took " + + us + "us"); + + if (i > 10) { + sum += us; + } + } + + System.out.println("Avg: " + (sum / (TRIALS - 10)) + "us"); + } +} Index: src/main/java/org/apache/hadoop/hbase/util/Bytes.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/util/Bytes.java (revision 1333101) +++ src/main/java/org/apache/hadoop/hbase/util/Bytes.java (working copy) @@ -33,7 +33,7 @@ import java.util.Comparator; import java.util.Iterator; -import org.apache.commons.lang.StringUtils; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; @@ -47,6 +47,9 @@ import sun.misc.Unsafe; import com.google.common.annotations.VisibleForTesting; +import com.google.common.primitives.Ints; +import com.google.common.primitives.Longs; +import com.google.common.primitives.Shorts; /** * Utility class that handles byte arrays, conversions to/from other types, @@ -480,7 +483,8 @@ * @return the long value */ public static long toLong(byte[] bytes) { - return toLong(bytes, 0, SIZEOF_LONG); + return LexicographicalComparerHolder.BEST_COMPARER + .toLong(bytes, 0); } /** @@ -492,7 +496,8 @@ * @return the long value */ public static long toLong(byte[] bytes, int offset) { - return toLong(bytes, offset, SIZEOF_LONG); + return LexicographicalComparerHolder.BEST_COMPARER + .toLong(bytes, offset); } /** @@ -570,7 +575,7 @@ * @return Float made from passed byte array. */ public static float toFloat(byte [] bytes, int offset) { - return Float.intBitsToFloat(toInt(bytes, offset, SIZEOF_INT)); + return Float.intBitsToFloat(toInt(bytes, offset)); } /** @@ -606,7 +611,7 @@ * @return Return double made from passed bytes. */ public static double toDouble(final byte [] bytes, final int offset) { - return Double.longBitsToDouble(toLong(bytes, offset, SIZEOF_LONG)); + return Double.longBitsToDouble(toLong(bytes, offset)); } /** @@ -652,7 +657,7 @@ * @return the int value */ public static int toInt(byte[] bytes) { - return toInt(bytes, 0, SIZEOF_INT); + return toInt(bytes, 0); } /** @@ -662,7 +667,8 @@ * @return the int value */ public static int toInt(byte[] bytes, int offset) { - return toInt(bytes, offset, SIZEOF_INT); + return LexicographicalComparerHolder.BEST_COMPARER + .toInt(bytes, offset); } /** @@ -727,7 +733,7 @@ * @return the short value */ public static short toShort(byte[] bytes) { - return toShort(bytes, 0, SIZEOF_SHORT); + return toShort(bytes, 0); } /** @@ -737,7 +743,8 @@ * @return the short value */ public static short toShort(byte[] bytes, int offset) { - return toShort(bytes, offset, SIZEOF_SHORT); + return LexicographicalComparerHolder.BEST_COMPARER + .toShort(bytes, offset); } /** @@ -967,6 +974,10 @@ } interface Comparer { + int toInt(byte[] buf, int offset); + short toShort(byte[] buf, int offset); + long toLong(byte[] buf, int offset); + abstract public int compareTo(T buffer1, int offset1, int length1, T buffer2, int offset2, int length2); } @@ -1031,6 +1042,30 @@ } return length1 - length2; } + + @Override + public int toInt(byte[] buf, int offset) { + return Ints.fromBytes( + buf[offset], buf[offset+1], buf[offset+2], buf[offset+3]); + } + + @Override + public short toShort(byte[] buf, int offset) { + return Shorts.fromBytes(buf[offset], buf[offset+1]); + } + + @Override + public long toLong(byte[] buf, int offset) { + return Longs.fromBytes( + buf[offset], + buf[offset+1], + buf[offset+2], + buf[offset+3], + buf[offset+4], + buf[offset+5], + buf[offset+6], + buf[offset+7]); + } } @VisibleForTesting @@ -1072,6 +1107,45 @@ static final boolean littleEndian = ByteOrder.nativeOrder().equals(ByteOrder.LITTLE_ENDIAN); + @Override + public long toLong(byte[] buf, int off) { + @SuppressWarnings("unused") + int triggerBoundsCheck = buf[off + SIZEOF_LONG - 1]; + long result = theUnsafe.getLong(buf, + BYTE_ARRAY_BASE_OFFSET + (long)off); + if (!littleEndian) { + return result; + } else { + return Long.reverseBytes(result); + } + } + + @Override + public int toInt(byte[] buf, int off) { + @SuppressWarnings("unused") + int triggerBoundsCheck = buf[off + SIZEOF_INT - 1]; + int result = theUnsafe.getInt(buf, + BYTE_ARRAY_BASE_OFFSET + (long)off); + if (!littleEndian) { + return result; + } else { + return Integer.reverseBytes(result); + } + } + + @Override + public short toShort(byte[] buf, int off) { + @SuppressWarnings("unused") + int triggerBoundsCheck = buf[off + SIZEOF_SHORT - 1]; + short result = theUnsafe.getShort(buf, + BYTE_ARRAY_BASE_OFFSET + (long)off); + if (!littleEndian) { + return result; + } else { + return Short.reverseBytes(result); + } + } + /** * Returns true if x1 is less than x2, when both values are treated as * unsigned. @@ -1100,17 +1174,17 @@ length1 == length2) { return 0; } - int minLength = Math.min(length1, length2); - int minWords = minLength / SIZEOF_LONG; - int offset1Adj = offset1 + BYTE_ARRAY_BASE_OFFSET; - int offset2Adj = offset2 + BYTE_ARRAY_BASE_OFFSET; + final int minLength = Math.min(length1, length2); + final int minWords = minLength >>> 3; // / SIZEOF_LONG; + final int offset1Adj = offset1 + BYTE_ARRAY_BASE_OFFSET; + final int offset2Adj = offset2 + BYTE_ARRAY_BASE_OFFSET; /* * Compare 8 bytes at a time. Benchmarking shows comparing 8 bytes at a * time is no slower than comparing 4 bytes at a time even on 32-bit. * On the other hand, it is substantially faster on 64-bit. */ - for (int i = 0; i < minWords * SIZEOF_LONG; i += SIZEOF_LONG) { + for (int i = 0; i < minWords << 3; i += SIZEOF_LONG) { long lw = theUnsafe.getLong(buffer1, offset1Adj + (long) i); long rw = theUnsafe.getLong(buffer2, offset2Adj + (long) i); long diff = lw ^ rw; @@ -1118,34 +1192,15 @@ if (diff != 0) { if (!littleEndian) { return lessThanUnsigned(lw, rw) ? -1 : 1; - } - - // Use binary search - int n = 0; - int y; - int x = (int) diff; - if (x == 0) { - x = (int) (diff >>> 32); - n = 32; - } - - y = x << 16; - if (y == 0) { - n += 16; } else { - x = y; + return lessThanUnsigned(Long.reverseBytes(lw), + Long.reverseBytes(rw)) ? -1 : 1; } - - y = x << 8; - if (y == 0) { - n += 8; - } - return (int) (((lw >>> n) & 0xFFL) - ((rw >>> n) & 0xFFL)); } } // The epilogue to cover the last (minLength % 8) elements. - for (int i = minWords * SIZEOF_LONG; i < minLength; i++) { + for (int i = minWords << 3 ; i < minLength; i++) { int a = (buffer1[offset1 + i] & 0xff); int b = (buffer2[offset2 + i] & 0xff); if (a != b) { Index: src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java (revision 1333101) +++ src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java (working copy) @@ -220,10 +220,10 @@ int offset = kv.getOffset(); int initialOffset = offset; - int keyLength = Bytes.toInt(bytes, offset, Bytes.SIZEOF_INT); + int keyLength = Bytes.toInt(bytes, offset); offset += KeyValue.ROW_OFFSET; - short rowLength = Bytes.toShort(bytes, offset, Bytes.SIZEOF_SHORT); + short rowLength = Bytes.toShort(bytes, offset); offset += Bytes.SIZEOF_SHORT; int ret = this.rowComparator.compareRows(row, 0, row.length, Index: src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java (revision 1333101) +++ src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java (working copy) @@ -4747,7 +4747,7 @@ if(kv.getValueLength() == 8){ byte [] buffer = kv.getBuffer(); int valueOffset = kv.getValueOffset(); - result += Bytes.toLong(buffer, valueOffset, Bytes.SIZEOF_LONG); + result += Bytes.toLong(buffer, valueOffset); } else{ wrongLength = true;