Index: src/main/java/org/apache/hadoop/hbase/KeyValue.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/KeyValue.java (revision 1354295) +++ src/main/java/org/apache/hadoop/hbase/KeyValue.java (working copy) @@ -131,6 +131,21 @@ return COMPARATOR.getRawComparator(); } + /** Size of the key length field in bytes*/ + public static final int KEY_LENGTH_SIZE = Bytes.SIZEOF_INT; + + /** Size of the key type field in bytes */ + public static final int TYPE_SIZE = Bytes.SIZEOF_BYTE; + + /** Size of the row length field in bytes */ + public static final int ROW_LENGTH_SIZE = Bytes.SIZEOF_SHORT; + + /** Size of the family length field in bytes */ + public static final int FAMILY_LENGTH_SIZE = Bytes.SIZEOF_BYTE; + + /** Size of the timestamp field in bytes */ + public static final int TIMESTAMP_SIZE = Bytes.SIZEOF_LONG; + // Size of the timestamp and type byte on end of a key -- a long + a byte. public static final int TIMESTAMP_TYPE_SIZE = Bytes.SIZEOF_LONG /* timestamp */ + @@ -1897,39 +1912,94 @@ return compare; } - // Compare column family. Start compare past row and family length. - int lcolumnoffset = Bytes.SIZEOF_SHORT + lrowlength + 1 + loffset; - int rcolumnoffset = Bytes.SIZEOF_SHORT + rrowlength + 1 + roffset; - int lcolumnlength = llength - TIMESTAMP_TYPE_SIZE - - (lcolumnoffset - loffset); - int rcolumnlength = rlength - TIMESTAMP_TYPE_SIZE - - (rcolumnoffset - roffset); + // Compare the rest of the two KVs. This function will not compare rows + // anyway, so we don't need to tell it that the common prefix includes the + // row. + return compareWithoutRow(left, loffset, llength, right, roffset, rlength, + rrowlength); + } - // if row matches, and no column in the 'left' AND put type is 'minimum', - // then return that left is larger than right. + public int compare(byte[] left, byte[] right) { + return compare(left, 0, left.length, right, 0, right.length); + } - // This supports 'last key on a row' - the magic is if there is no column in the - // left operand, and the left operand has a type of '0' - magical value, - // then we say the left is bigger. This will let us seek to the last key in - // a row. + public int compareRows(byte [] left, int loffset, int llength, + byte [] right, int roffset, int rlength) { + return Bytes.compareTo(left, loffset, llength, right, roffset, rlength); + } + protected int compareColumns( + byte [] left, int loffset, int llength, final int lfamilylength, + byte [] right, int roffset, int rlength, final int rfamilylength) { + return KeyValue.compareColumns(left, loffset, llength, lfamilylength, + right, roffset, rlength, rfamilylength); + } + + /** + * Compare columnFamily, qualifier, timestamp, and key type (everything + * except the row). Note that we are assuming that row portions of both KVs + * have already been parsed and found identical, and we don't validate that + * assumption here. + * @return comparison result. + */ + private int compareWithoutRow(byte[] left, int loffset, + int llength, byte[] right, int roffset, int rlength, short rowlength) { + /*** + * KeyValue Format and commonLength: + * |_keyLen_|_valLen_|_rowLen_|_rowKey_|_famiLen_|_fami_|_Quali_|.... + * ------------------|-------commonLength--------|-------------- + */ + int commonLength = ROW_LENGTH_SIZE + FAMILY_LENGTH_SIZE + rowlength; + + // commonLength + TIMESTAMP_TYPE_SIZE + int commonLengthWithTSAndType = TIMESTAMP_TYPE_SIZE + commonLength; + // ColumnFamily + Qualifier length. + int lcolumnlength = llength - commonLengthWithTSAndType; + int rcolumnlength = rlength - commonLengthWithTSAndType; + byte ltype = left[loffset + (llength - 1)]; byte rtype = right[roffset + (rlength - 1)]; + // If the column is not specified, the "minimum" key type appears the + // latest in the sorted order, regardless of the timestamp. This is used + // for specifying the last key/value in a given row, because there is no + // "lexicographically last column" (it would be infinitely long). The + // "maximum" key type does not need this behavior. if (lcolumnlength == 0 && ltype == Type.Minimum.getCode()) { - return 1; // left is bigger. + // left is "bigger", i.e. it appears later in the sorted order + return 1; } if (rcolumnlength == 0 && rtype == Type.Minimum.getCode()) { return -1; } - // TODO the family and qualifier should be compared separately - compare = Bytes.compareTo(left, lcolumnoffset, lcolumnlength, right, - rcolumnoffset, rcolumnlength); - if (compare != 0) { - return compare; + int lfamilyoffset = commonLength + loffset; + int rfamilyoffset = commonLength + roffset; + + // Column family length. + int lfamilylength = left[lfamilyoffset - 1]; + int rfamilylength = right[rfamilyoffset - 1]; + // If left family size is not equal to right family size, we need not + // compare the qualifiers. + boolean sameFamilySize = (lfamilylength == rfamilylength); + if (!sameFamilySize) { + // comparing column family is enough. + return Bytes.compareTo(left, lfamilyoffset, lfamilylength, right, + rfamilyoffset, rfamilylength); } + // Compare family & qualifier together. + final int comparison = Bytes.compareTo(left, lfamilyoffset, + lcolumnlength, right, rfamilyoffset, rcolumnlength); + if (comparison != 0) { + return comparison; + } + return compareTimestampAndType(left, loffset, llength, right, roffset, + rlength, ltype, rtype); + } + private int compareTimestampAndType(byte[] left, int loffset, int llength, + byte[] right, int roffset, int rlength, byte ltype, byte rtype) { + int compare; if (!this.ignoreTimestamp) { // Get timestamps. long ltimestamp = Bytes.toLong(left, @@ -1944,28 +2014,14 @@ if (!this.ignoreType) { // Compare types. Let the delete types sort ahead of puts; i.e. types - // of higher numbers sort before those of lesser numbers + // of higher numbers sort before those of lesser numbers. Maximum (255) + // appears ahead of everything, and minimum (0) appears after + // everything. return (0xff & rtype) - (0xff & ltype); } return 0; } - public int compare(byte[] left, byte[] right) { - return compare(left, 0, left.length, right, 0, right.length); - } - - public int compareRows(byte [] left, int loffset, int llength, - byte [] right, int roffset, int rlength) { - return Bytes.compareTo(left, loffset, llength, right, roffset, rlength); - } - - protected int compareColumns( - byte [] left, int loffset, int llength, final int lfamilylength, - byte [] right, int roffset, int rlength, final int rfamilylength) { - return KeyValue.compareColumns(left, loffset, llength, lfamilylength, - right, roffset, rlength, rfamilylength); - } - int compareTimestamps(final long ltimestamp, final long rtimestamp) { // The below older timestamps sorting ahead of newer timestamps looks // wrong but it is intentional. This way, newer timestamps are first Index: src/test/java/org/apache/hadoop/hbase/TestKeyValue.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/TestKeyValue.java (revision 1354295) +++ src/test/java/org/apache/hadoop/hbase/TestKeyValue.java (working copy) @@ -339,6 +339,34 @@ assertTrue(cmp > 0); } + public void testCompareWithoutRow() { + final KVComparator c = KeyValue.COMPARATOR; + byte[] row = Bytes.toBytes("row"); + byte[] fami0 = Bytes.toBytes("fami"); + byte[] fami1 = Bytes.toBytes("fami1"); + byte[] fami2 = Bytes.toBytes("fami2"); + byte[] qual0 = Bytes.toBytes(""); + byte[] qual1 = Bytes.toBytes("qf1"); + byte[] qual2 = Bytes.toBytes("qf2"); + long ts = 1; + // 'fami1:' + KeyValue kv1_0 = new KeyValue(row, fami1, qual0, ts, Type.Put); + // 'fami1:qual1' + KeyValue kv1_1 = new KeyValue(row, fami1, qual1, ts, Type.Put); + // 'fami2:qual1' + KeyValue kv2_1 = new KeyValue(row, fami2, qual1, ts, Type.Put); + // 'fami:qf1' + KeyValue kv0_1 = new KeyValue(row, fami0, qual1, ts, Type.Put); + // 'fami:qf2' + KeyValue kv0_2 = new KeyValue(row, fami0, qual2, ts, Type.Put); + // 'fami:qf1' < 'fami:qf2' + assertKVLess(c, kv0_1, kv0_2); + // 'fami1:qual1' < 'fami2:qual1' + assertKVLess(c, kv1_1, kv2_1); + // 'fami:qf1' < 'fami1:' + assertKVLess(c, kv0_1, kv1_0); + } + public void testFirstLastOnRow() { final KVComparator c = KeyValue.COMPARATOR; long ts = 1;