Index: src/test/java/org/apache/hadoop/hbase/TestKeyValue.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/TestKeyValue.java (revision 1043216) +++ src/test/java/org/apache/hadoop/hbase/TestKeyValue.java (working copy) @@ -34,6 +34,14 @@ public class TestKeyValue extends TestCase { private final Log LOG = LogFactory.getLog(this.getClass().getName()); + public void testVersion() { + int expected = KeyValue.getVersion(KeyValue.VERSION_BITS); + KeyValue kv = new KeyValue(Bytes.toBytes("test"), + System.currentTimeMillis(), Type.Put); + int found = kv.getVersion(); + assertEquals(expected, found); + } + public void testColumnCompare() throws Exception { final byte [] a = Bytes.toBytes("aaa"); byte [] family1 = Bytes.toBytes("abc"); Index: src/main/java/org/apache/hadoop/hbase/KeyValue.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/KeyValue.java (revision 1043218) +++ src/main/java/org/apache/hadoop/hbase/KeyValue.java (working copy) @@ -36,7 +36,9 @@ import org.apache.hadoop.io.Writable; /** - * An HBase Key/Value. This is the fundamental HBase Type. + * An HBase Key/Value. This is the fundamental HBase Type. Its persisted into + * StoreFiles/HFiles in the FileSystem and is what a {@link org.hadoop.hbase.client.Result} + * carries from server to HBase client. * *
If being used client-side, the primary methods to access individual fields
* are {@link #getRow()}, {@link #getFamily()}, {@link #getQualifier()},
@@ -54,7 +56,7 @@
* format inside a byte array is:
* <keylength> <valuelength> <key> <value>
* Key is further decomposed as:
- * <rowlength> <row> <columnfamilylength> <columnfamily> <columnqualifier> <timestamp> <keytype>
+ * <rowlength> <row> <columnfamilylength> <columnfamily> <columnqualifier> <timestamp> <insertionseqid> <keytype>
* The rowlength maximum is Short.MAX_SIZE,
* column family length maximum is
* Byte.MAX_SIZE, and column qualifier + key length must
@@ -63,8 +65,6 @@
*/
public class KeyValue implements Writable, HeapSize {
static final Log LOG = LogFactory.getLog(KeyValue.class);
- // TODO: Group Key-only comparators and operations into a Key class, just
- // for neatness sake, if can figure what to call it.
/**
* Colon character in UTF-8
@@ -72,7 +72,7 @@
public static final char COLUMN_FAMILY_DELIMITER = ':';
public static final byte[] COLUMN_FAMILY_DELIM_ARRAY =
- new byte[]{COLUMN_FAMILY_DELIMITER};
+ new byte[] {COLUMN_FAMILY_DELIMITER};
/**
* Comparator for plain key/values; i.e. non-catalog table key/values.
@@ -111,12 +111,8 @@
/**
* Get the appropriate row comparator for the specified table.
- *
- * Hopefully we can get rid of this, I added this here because it's replacing
- * something in HSK. We should move completely off of that.
- *
* @param tableName The table name.
- * @return The comparator.
+ * @return The comparator to use on this table.
*/
public static KeyComparator getRowComparator(byte [] tableName) {
if(Bytes.equals(HTableDescriptor.ROOT_TABLEDESC.getName(),tableName)) {
@@ -128,16 +124,18 @@
return COMPARATOR.getRawComparator();
}
- // Size of the timestamp and type byte on end of a key -- a long + a byte.
- public static final int TIMESTAMP_TYPE_SIZE =
+ // Size of the timestamp, insertion seqid long and the type byte on the end
+ // of the key portion of a KeyValue -- two longs and a byte.
+ public static final int TIMESTAMP_SEQID_TYPE_SIZE =
Bytes.SIZEOF_LONG /* timestamp */ +
+ Bytes.SIZEOF_LONG /* insertion seqid */ +
Bytes.SIZEOF_BYTE /*keytype*/;
- // Size of the length shorts and bytes in key.
+ // Size of the length shorts and bytes in a key.
public static final int KEY_INFRASTRUCTURE_SIZE =
Bytes.SIZEOF_SHORT /*rowlength*/ +
Bytes.SIZEOF_BYTE /*columnfamilylength*/ +
- TIMESTAMP_TYPE_SIZE;
+ TIMESTAMP_SEQID_TYPE_SIZE;
// How far into the key the row starts at. First thing to read is the short
// that says how long the row is.
@@ -149,9 +147,21 @@
public static final int KEYVALUE_INFRASTRUCTURE_SIZE = ROW_OFFSET;
/**
+ * Mask used comparing KeyValue Types without regard to KeyValue version.
+ */
+ private static final byte VERSION_MASK = (byte)192;
+ private static final byte VERSION_MASK_INVERSE = ~VERSION_MASK;
+
+ /**
+ * This KeyValues version in bits that can be OR'd into a Type.
+ */
+ static final byte VERSION_BITS = (byte)64;
+
+ /**
* Key type.
* Has space for other key types to be added later. Cannot rely on
* enum ordinals . They change if item is removed or moved. Do our own codes.
+ * Keys are versioned using two most significant bytes in Type.
*/
public static enum Type {
Minimum((byte)0),
@@ -161,6 +171,14 @@
DeleteColumn((byte)12),
DeleteFamily((byte)14),
+ // Bit 64 and 128 are reserved used specifying KeyValue version. If top
+ // two bits zero, then version is 0. If 7th bit is set -- 64 -- then
+ // version 1. Version is ignored when KV is compared. We define Version
+ // below just to show that the bits are occupied. The version bit twiddling
+ // and compares are done elsewhere, outside of this enum. See VERSION_MASK
+ // and VERSION_BITS defines above.
+ Version((byte)192),
+
// Maximum is used when searching; you look from maximum on down.
Maximum((byte)255);
@@ -182,11 +200,12 @@
*/
public static Type codeToType(final byte b) {
for (Type t : Type.values()) {
- if (t.getCode() == b) {
+ // When comparing, do not compare on version.
+ if (t.getCode() == stripVersionFromType(b)) {
return t;
}
}
- throw new RuntimeException("Unknown code " + b);
+ throw new RuntimeException("Unknown code " + stripVersionFromType(b));
}
}
@@ -205,18 +224,6 @@
// the row cached
private byte [] rowCache = null;
-
- /** Here be dragons **/
-
- // used to achieve atomic operations in the memstore.
- public long getMemstoreTS() {
- return memstoreTS;
- }
-
- public void setMemstoreTS(long memstoreTS) {
- this.memstoreTS = memstoreTS;
- }
-
// default value is 0, aka DNC
private long memstoreTS = 0;
@@ -473,7 +480,9 @@
pos = Bytes.putBytes(bytes, pos, qualifier, qoffset, qlength);
}
pos = Bytes.putLong(bytes, pos, timestamp);
- pos = Bytes.putByte(bytes, pos, type.getCode());
+ // Put insertion seqid. TODO: FIXXXX
+ pos = Bytes.putLong(bytes, pos, timestamp);
+ pos = Bytes.putByte(bytes, pos, addVersionToType(type.getCode()));
if (value != null && value.length > 0) {
pos = Bytes.putBytes(bytes, pos, value, voffset, vlength);
}
@@ -481,6 +490,20 @@
}
/**
+ * Decorate passed type with this KVs version.
+ * @param type
+ * @return Type with Version added.
+ */
+ private static byte addVersionToType(final byte type) {
+ // First blank out any existing version before adding this KVs.
+ return (byte)(((type & VERSION_MASK_INVERSE)) | VERSION_BITS);
+ }
+
+ private static byte stripVersionFromType(final byte b) {
+ return (byte)(b & VERSION_MASK);
+ }
+
+ /**
* Write KeyValue format into a byte array.
*
* Takes column in the form family:qualifier
@@ -601,13 +624,13 @@
String row = Bytes.toStringBinary(b, o + Bytes.SIZEOF_SHORT, rowlength);
int columnoffset = o + Bytes.SIZEOF_SHORT + 1 + rowlength;
int familylength = b[columnoffset - 1];
- int columnlength = l - ((columnoffset - o) + TIMESTAMP_TYPE_SIZE);
+ int columnlength = l - ((columnoffset - o) + TIMESTAMP_SEQID_TYPE_SIZE);
String family = familylength == 0? "":
Bytes.toStringBinary(b, columnoffset, familylength);
String qualifier = columnlength == 0? "":
Bytes.toStringBinary(b, columnoffset + familylength,
columnlength - familylength);
- long timestamp = Bytes.toLong(b, o + (l - TIMESTAMP_TYPE_SIZE));
+ long timestamp = Bytes.toLong(b, o + (l - TIMESTAMP_SEQID_TYPE_SIZE));
byte type = b[o + l - 1];
// return row + "/" + family +
// (family != null && family.length() > 0? COLUMN_FAMILY_DELIMITER: "") +
@@ -644,6 +667,29 @@
return length;
}
+ public long getMemstoreTS() {
+ return memstoreTS;
+ }
+
+ public void setMemstoreTS(long memstoreTS) {
+ this.memstoreTS = memstoreTS;
+ }
+
+ /**
+ * @return This instance's version.
+ */
+ public int getVersion() {
+ return getVersion(getType());
+ }
+
+ /**
+ * @param type
+ * @return Version that is in passed type
+ */
+ public static int getVersion(final byte type) {
+ return (type & VERSION_MASK) >>> 6;
+ }
+
//---------------------------------------------------------------------------
//
// Length and Offset Calculators
@@ -801,7 +847,7 @@
* @return Timestamp offset
*/
public int getTimestampOffset(final int keylength) {
- return getKeyOffset() + keylength - TIMESTAMP_TYPE_SIZE;
+ return getKeyOffset() + keylength - TIMESTAMP_SEQID_TYPE_SIZE;
}
/**
@@ -1872,9 +1918,9 @@
// Compare column family. Start compare past row and family length.
int lcolumnoffset = Bytes.SIZEOF_SHORT + lrowlength + 1 + loffset;
int rcolumnoffset = Bytes.SIZEOF_SHORT + rrowlength + 1 + roffset;
- int lcolumnlength = llength - TIMESTAMP_TYPE_SIZE -
+ int lcolumnlength = llength - TIMESTAMP_SEQID_TYPE_SIZE -
(lcolumnoffset - loffset);
- int rcolumnlength = rlength - TIMESTAMP_TYPE_SIZE -
+ int rcolumnlength = rlength - TIMESTAMP_SEQID_TYPE_SIZE -
(rcolumnoffset - roffset);
// if row matches, and no column in the 'left' AND put type is 'minimum',
@@ -1905,9 +1951,9 @@
if (!this.ignoreTimestamp) {
// Get timestamps.
long ltimestamp = Bytes.toLong(left,
- loffset + (llength - TIMESTAMP_TYPE_SIZE));
+ loffset + (llength - TIMESTAMP_SEQID_TYPE_SIZE));
long rtimestamp = Bytes.toLong(right,
- roffset + (rlength - TIMESTAMP_TYPE_SIZE));
+ roffset + (rlength - TIMESTAMP_SEQID_TYPE_SIZE));
compare = compareTimestamps(ltimestamp, rtimestamp);
if (compare != 0) {
return compare;