Index: hbase-common/src/test/java/org/apache/hadoop/hbase/TestKeyValue.java
===================================================================
--- hbase-common/src/test/java/org/apache/hadoop/hbase/TestKeyValue.java (revision 1450060)
+++ hbase-common/src/test/java/org/apache/hadoop/hbase/TestKeyValue.java (working copy)
@@ -27,6 +27,7 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.KeyValue.KVComparator;
+import org.apache.hadoop.hbase.KeyValue.KeyComparator;
import org.apache.hadoop.hbase.KeyValue.MetaComparator;
import org.apache.hadoop.hbase.KeyValue.Type;
import org.apache.hadoop.hbase.util.Bytes;
@@ -493,4 +494,47 @@
assertEquals(HConstants.LATEST_TIMESTAMP, time1);
assertEquals(12345L, time2);
}
+
+ public void testGetSmallestSeparator() {
+ final KeyComparator keyComparator = new KeyValue.KeyComparator();
+ final KeyValue firstOnRowA = KeyValue.createFirstOnRow(rowA);
+ final KeyValue lastOnRowA = KeyValue.createLastOnRow(rowA);
+ assertTrue(keyComparator.compare(firstOnRowA.getKey(), lastOnRowA.getKey()) < 0);
+ byte[] newKey = keyComparator.getSmallestSeparator(firstOnRowA.getKey(), lastOnRowA.getKey());
+ assertTrue(keyComparator.compare(firstOnRowA.getKey(), newKey) < 0);
+ assertTrue(keyComparator.compare(newKey, lastOnRowA.getKey()) <= 0);
+ //verify that faked shorter rowkey could be generated
+ long ts = 5;
+ KeyValue kv1 = new KeyValue(Bytes.toBytes("the quick brown fox"), family, qualA, ts,
+ Type.Put);
+ KeyValue kv2 = new KeyValue(Bytes.toBytes("the who test text"), family, qualA, ts,
+ Type.Put);
+ newKey = keyComparator.getSmallestSeparator(kv1.getKey(), kv2.getKey());
+ assertTrue(keyComparator.compare(kv1.getKey(), newKey) < 0);
+ assertTrue(keyComparator.compare(newKey, kv2.getKey()) <= 0);
+ short newRowLength = Bytes.toShort(newKey, 0);
+ byte[] expectedArray = Bytes.toBytes("the r");
+ Bytes.equals(newKey, KeyValue.ROW_LENGTH_SIZE, newRowLength, expectedArray, 0,
+ expectedArray.length);
+ //verify that faked ts could be generated
+ kv1 = new KeyValue(Bytes.toBytes("ilovehbase"), family, qualA, 5,
+ Type.Put);
+ kv2 = new KeyValue(Bytes.toBytes("ilovehbase"), family, qualA, 0,
+ Type.Put);
+ assertTrue(keyComparator.compare(kv1.getKey(), kv2.getKey()) < 0);
+ newKey = keyComparator.getSmallestSeparator(kv1.getKey(), kv2.getKey());
+ assertTrue(keyComparator.compare(kv1.getKey(), newKey) < 0);
+ assertTrue(keyComparator.compare(newKey, kv2.getKey()) <= 0);
+ long newTimestamp = Bytes.toLong(newKey, newKey.length - KeyValue.TIMESTAMP_TYPE_SIZE);
+ assertTrue(newTimestamp == 4);
+ //verify root/metaKeyComparator's getSmallestSeparator output
+ final KeyComparator rootKeyComparator = new KeyValue.RootKeyComparator();
+ final KeyComparator metaKeyComparator = new KeyValue.MetaKeyComparator();
+ newKey = rootKeyComparator.getSmallestSeparator(kv1.getKey(), kv2.getKey());
+ assertTrue(rootKeyComparator.compare(kv1.getKey(), newKey) < 0);
+ assertTrue(rootKeyComparator.compare(newKey, kv2.getKey()) == 0);
+ newKey = metaKeyComparator.getSmallestSeparator(kv1.getKey(), kv2.getKey());
+ assertTrue(metaKeyComparator.compare(kv1.getKey(), newKey) < 0);
+ assertTrue(metaKeyComparator.compare(newKey, kv2.getKey()) == 0);
+ }
}
\ No newline at end of file
Index: hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java
===================================================================
--- hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java (revision 1450060)
+++ hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java (working copy)
@@ -26,6 +26,7 @@
import java.io.OutputStream;
import java.nio.BufferOverflowException;
import java.nio.ByteBuffer;
+import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
@@ -2495,6 +2496,16 @@
byte[] right, int roffset, int rlength) {
return Bytes.compareTo(left, loffset, llength, right, roffset, rlength);
}
+
+ /**
+ * @param leftKey
+ * @param rightKey
+ * Currently, let's return rightKey directly for meta/root key comparator
+ * @see {@link KeyComparator#getSmallestSeparator(byte[] leftKey, byte[] rightKey)}
+ */
+ public byte[] getSmallestSeparator(byte[] leftKey, byte[] rightKey) {
+ return Arrays.copyOf(rightKey, rightKey.length);
+ }
}
/**
@@ -2705,6 +2716,57 @@
}
return 0;
}
+
+ /**
+ * Generate a shorter byte array if possible, see HBASE-7845
+ * Else generate a smaller TS than leftKey, see HBASE-4443
+ * All of above need to ensure: leftKey < newKey <= rightKey
+ * Please use it on HFileV2 only(includeMemstoreTS is true by default)
+ * @return newKey: the new faked key
+ */
+ public byte[] getSmallestSeparator(byte[] leftKey, byte[] rightKey) {
+ if (rightKey == null) {
+ throw new IllegalArgumentException("rightKey can not be null");
+ }
+ if (leftKey == null) {
+ return Arrays.copyOf(rightKey, rightKey.length);
+ }
+
+ // Compare row
+ short leftRowLength = Bytes.toShort(leftKey, 0);
+ short rightRowLength = Bytes.toShort(rightKey, 0);
+ if (leftRowLength != rightRowLength || compareRows(leftKey, ROW_LENGTH_SIZE, leftRowLength,
+ rightKey, ROW_LENGTH_SIZE, rightRowLength) != 0) {
+ int minLength = Math.min(leftRowLength, rightRowLength);
+ int diffIdx = 0;
+ while (diffIdx < minLength && leftKey[ROW_LENGTH_SIZE + diffIdx] ==
+ rightKey[ROW_LENGTH_SIZE + diffIdx]) {
+ diffIdx++;
+ }
+ if (diffIdx >= minLength) {
+ // Do not shorten if is a prefix
+ return Arrays.copyOf(rightKey, rightKey.length);
+ }
+ int diffByte = leftKey[ROW_LENGTH_SIZE + diffIdx];
+ if ((0xff & diffByte) < 0xff && (diffByte + 1) <
+ (rightKey[ROW_LENGTH_SIZE + diffIdx] & 0xff)) {
+ byte[] newKey = new byte[ROW_LENGTH_SIZE + diffIdx + 1];
+ System.arraycopy(leftKey, 0, newKey, 0, ROW_LENGTH_SIZE + diffIdx);
+ newKey[ROW_LENGTH_SIZE + diffIdx] = (byte)(diffByte + 1);
+ return newKey;
+ }
+ return Arrays.copyOf(rightKey, rightKey.length);
+ }
+ // If the rows are equal, let's generate a smallert TS than leftKey's
+ long leftTimestamp = Bytes.toLong(leftKey, leftKey.length - TIMESTAMP_TYPE_SIZE);
+ if (leftTimestamp == HConstants.OLDEST_TIMESTAMP) {
+ return Arrays.copyOf(rightKey, rightKey.length);
+ }
+ long newTimestamp = leftTimestamp - 1;
+ byte[] newKey = Arrays.copyOf(leftKey, leftKey.length);
+ Bytes.putLong(newKey, leftKey.length - TIMESTAMP_TYPE_SIZE, newTimestamp);
+ return newKey;
+ }
}
/**
Index: hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV2.java
===================================================================
--- hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV2.java (revision 1450060)
+++ hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV2.java (working copy)
@@ -34,6 +34,7 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValue.KeyComparator;
+import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.hfile.HFile.Writer;
import org.apache.hadoop.hbase.io.hfile.HFileBlock.BlockWritable;
@@ -77,6 +78,9 @@
/** The offset of the last data block or 0 if the file is empty. */
private long lastDataBlockOffset;
+ /** The last(stop) Key of the previous data block. */
+ private byte[] lastKeyOfPreviousBlock = null;
+
/** Additional data items to be written to the "load-on-open" section. */
private List additionalLoadOnOpenData =
new ArrayList();
@@ -169,12 +173,20 @@
// Update the last data block offset
lastDataBlockOffset = outputStream.getPos();
-
fsBlockWriter.writeHeaderAndData(outputStream);
-
int onDiskSize = fsBlockWriter.getOnDiskSizeWithHeader();
- dataBlockIndexWriter.addEntry(firstKeyInBlock, lastDataBlockOffset,
- onDiskSize);
+ // Generate a shorter faked key into index block. For example, consider a block boundary
+ // between the keys "the quick brown fox" and "the who test text". We can use "the r" as the
+ // key for the index block entry since it is > all entries in the previous block and <= all
+ // entries in subsequent blocks.
+ if (comparator instanceof KeyComparator && blockEncoder.getEncodingOnDisk() ==
+ DataBlockEncoding.NONE) {
+ byte[] shortestSeparator = ((KeyComparator) comparator).getSmallestSeparator(
+ lastKeyOfPreviousBlock, firstKeyInBlock);
+ dataBlockIndexWriter.addEntry(shortestSeparator, lastDataBlockOffset,onDiskSize);
+ } else {
+ dataBlockIndexWriter.addEntry(firstKeyInBlock, lastDataBlockOffset,onDiskSize);
+ }
totalUncompressedBytes += fsBlockWriter.getUncompressedSizeWithHeader();
HFile.offerWriteLatency(System.nanoTime() - startTimeNs);
@@ -229,6 +241,10 @@
// This is where the next block begins.
fsBlockWriter.startWriting(BlockType.DATA);
firstKeyInBlock = null;
+ if (lastKeyLength > 0) {
+ lastKeyOfPreviousBlock = new byte[lastKeyLength];
+ System.arraycopy(lastKeyBuffer, lastKeyOffset, lastKeyOfPreviousBlock, 0, lastKeyLength);
+ }
}
/**
@@ -463,4 +479,4 @@
}
});
}
-}
\ No newline at end of file
+}