Index: src/test/org/apache/hadoop/hbase/regionserver/TestHStoreFile.java =================================================================== --- src/test/org/apache/hadoop/hbase/regionserver/TestHStoreFile.java (revision 692702) +++ src/test/org/apache/hadoop/hbase/regionserver/TestHStoreFile.java (working copy) @@ -34,6 +34,7 @@ import org.apache.hadoop.hbase.HBaseTestCase; import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HStoreKey; /** * Test HStoreFile @@ -126,7 +127,7 @@ throws IOException { // Make a store file and write data to it. HStoreFile hsf = new HStoreFile(this.conf, this.fs, this.dir, - JenkinsHash.hash(Bytes.toBytes(getName())), + HRegionInfo.FIRST_META_REGIONINFO, Bytes.toBytes("colfamily"), 1234567890L, null); MapFile.Writer writer = hsf.getWriter(this.fs, SequenceFile.CompressionType.NONE, false, 0); @@ -145,7 +146,7 @@ midkey, HStoreFile.Range.top); HStoreFile refHsf = new HStoreFile(this.conf, this.fs, new Path(DIR, getName()), - JenkinsHash.hash(Bytes.toBytes(getName() + "_reference")), + HRegionInfo.FIRST_META_REGIONINFO, hsf.getColFamily(), 456, reference); // Assert that reference files are written and that we can write and // read the info reference file at least. Index: src/java/org/apache/hadoop/hbase/HStoreKey.java =================================================================== --- src/java/org/apache/hadoop/hbase/HStoreKey.java (revision 692702) +++ src/java/org/apache/hadoop/hbase/HStoreKey.java (working copy) @@ -26,7 +26,12 @@ import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.io.WritableComparator; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + /** * A Key for a stored row. */ @@ -39,6 +44,9 @@ private byte [] row = HConstants.EMPTY_BYTE_ARRAY; private byte [] column = HConstants.EMPTY_BYTE_ARRAY; private long timestamp = Long.MAX_VALUE; + + static final Log LOG = LogFactory.getLog(HStoreKey.class); + /* * regionInfo is only used as a hack to compare HSKs. @@ -50,7 +58,7 @@ public HStoreKey() { super(); } - + /** * Create an HStoreKey specifying only the row * The column defaults to the empty string, the time stamp defaults to @@ -73,13 +81,37 @@ this(row, Long.MAX_VALUE); } + /** * Create an HStoreKey specifying the row and timestamp * The column and table names default to the empty string * * @param row row key + * @param hri + */ + public HStoreKey(final byte [] row, final HRegionInfo hri) { + this(row, HConstants.EMPTY_BYTE_ARRAY, hri); + } + + /** + * Create an HStoreKey specifying the row and timestamp + * The column and table names default to the empty string + * + * @param row row key * @param timestamp timestamp value + * @param hri HRegionInfo */ + public HStoreKey(final byte [] row, long timestamp, final HRegionInfo hri) { + this(row, HConstants.EMPTY_BYTE_ARRAY, timestamp, hri); + } + + /** + * Create an HStoreKey specifying the row and timestamp + * The column and table names default to the empty string + * + * @param row row key + * @param timestamp timestamp value + */ public HStoreKey(final byte [] row, long timestamp) { this(row, HConstants.EMPTY_BYTE_ARRAY, timestamp); } @@ -188,7 +220,7 @@ * @param other the source key */ public HStoreKey(HStoreKey other) { - this(other.row, other.column, other.timestamp); + this(other.row, other.column, other.timestamp, other.regionInfo); } /** @@ -257,7 +289,7 @@ * @see #matchesRowFamily(HStoreKey) */ public boolean matchesRowCol(HStoreKey other) { - return Bytes.equals(this.row, other.row) && + return HStoreKey.equalsTwoRowKeys(this.regionInfo, this.row, other.row) && Bytes.equals(column, other.column); } @@ -271,7 +303,7 @@ * @see #matchesRowFamily(HStoreKey) */ public boolean matchesWithoutColumn(HStoreKey other) { - return Bytes.equals(this.row, other.row) && + return equalsTwoRowKeys(this.regionInfo, this.row, other.row) && this.timestamp >= other.getTimestamp(); } @@ -286,7 +318,7 @@ */ public boolean matchesRowFamily(HStoreKey that) { int delimiterIndex = getFamilyDelimiterIndex(this.column); - return Bytes.equals(this.row, that.row) && + return equalsTwoRowKeys(this.regionInfo, this.row, that.row) && Bytes.compareTo(this.column, 0, delimiterIndex, that.column, 0, delimiterIndex) == 0; } @@ -317,15 +349,19 @@ /** {@inheritDoc} */ public int compareTo(Object o) { - HStoreKey other = (HStoreKey)o; - int result = compareTwoRowKeys(this.regionInfo, this.row, other.row); + return compareTo(this.regionInfo, this, (HStoreKey)o); + } + + static int compareTo(final HRegionInfo hri, final HStoreKey left, + final HStoreKey right) { + int result = compareTwoRowKeys(hri, left.getRow(), right.getRow()); if (result != 0) { return result; } - result = this.column == null && other.column == null? 0: - this.column == null && other.column != null? -1: - this.column != null && other.column == null? 1: - Bytes.compareTo(this.column, other.column); + result = left.getColumn() == null && right.getColumn() == null? 0: + left.getColumn() == null && right.getColumn() != null? -1: + left.getColumn() != null && right.getColumn() == null? 1: + Bytes.compareTo(left.getColumn(), right.getColumn()); if (result != 0) { return result; } @@ -333,9 +369,9 @@ // wrong but it is intentional. This way, newer timestamps are first // found when we iterate over a memcache and newer versions are the // first we trip over when reading from a store file. - if (this.timestamp < other.timestamp) { + if (left.getTimestamp() < right.getTimestamp()) { result = 1; - } else if (this.timestamp > other.timestamp) { + } else if (left.getTimestamp() > right.getTimestamp()) { result = -1; } return result; @@ -479,12 +515,21 @@ byte[][] keysA = stripStartKeyMeta(rowA); byte[][] KeysB = stripStartKeyMeta(rowB); int rowCompare = Bytes.compareTo(keysA[0], KeysB[0]); - if(rowCompare == 0) + if(rowCompare == 0) // { rowCompare = Bytes.compareTo(keysA[1], KeysB[1]); +/* LOG.debug("compareTwoRowKeys[1], isMeta[" + regionInfo.isMetaRegion() + "]\n" + + "keysA[0] = [" + Bytes.toString(keysA[0]) + "] keysB[0] = [" + Bytes.toString(KeysB[0]) + "]\n" + + "keysA[1] = [" + Bytes.toString(keysA[1]) + "] keysB[1] = [" + Bytes.toString(KeysB[1]) + "]\n" + + "RESULT : rowCompare = [" + rowCompare + "]"); + } else { + LOG.debug("compareTwoRowKeys[0], isMeta[" + regionInfo.isMetaRegion() + "]\n" + + "keysA[0] = [" + Bytes.toString(keysA[0]) + "] keysB[0] = [" + Bytes.toString(KeysB[0]) + "]\n" + + "RESULT : rowCompare = [" + rowCompare + "]"); + } +*/ return rowCompare; - } else { - return Bytes.compareTo(rowA, rowB); } + return Bytes.compareTo(rowA, rowB); } /** @@ -513,10 +558,14 @@ break; } } - byte [] row = new byte[offset]; - System.arraycopy(rowKey, 0, row, 0,offset); - byte [] timestamp = new byte[rowKey.length - offset - 1]; - System.arraycopy(rowKey, offset+1, timestamp, 0,rowKey.length - offset - 1); + byte [] row = rowKey; + byte [] timestamp = HConstants.EMPTY_BYTE_ARRAY; + if (offset != -1) { + row = new byte[offset]; + System.arraycopy(rowKey, 0, row, 0, offset); + timestamp = new byte[rowKey.length - offset - 1]; + System.arraycopy(rowKey, offset+1, timestamp, 0,rowKey.length - offset - 1); + } byte[][] elements = new byte[2][]; elements[0] = row; elements[1] = timestamp; @@ -538,4 +587,18 @@ this.column = Bytes.readByteArray(in); this.timestamp = in.readLong(); } + + // WritableComparator that takes account of meta keys. + public static class HStoreKeyWritableComparator extends WritableComparator { + private final HRegionInfo hri; + + public HStoreKeyWritableComparator(final HRegionInfo hri) { + super(HStoreKey.class); + this.hri = hri; + } + + public int compare(final WritableComparable left, final WritableComparable right) { + return compareTo(this.hri, (HStoreKey)left, (HStoreKey)right); + } + } } Index: src/java/org/apache/hadoop/hbase/HConstants.java =================================================================== --- src/java/org/apache/hadoop/hbase/HConstants.java (revision 692702) +++ src/java/org/apache/hadoop/hbase/HConstants.java (working copy) @@ -92,6 +92,9 @@ /** Parameter name for how often threads should wake up */ static final String THREAD_WAKE_FREQUENCY = "hbase.server.thread.wakefrequency"; + + /** Parameter name for how often a region should should perform a major compaction */ + static final String MAJOR_COMPACTION_PERIOD = "hbase.hregion.majorcompaction"; /** Parameter name for HBase instance root directory */ static final String HBASE_DIR = "hbase.rootdir"; Index: src/java/org/apache/hadoop/hbase/regionserver/Memcache.java =================================================================== --- src/java/org/apache/hadoop/hbase/regionserver/Memcache.java (revision 692702) +++ src/java/org/apache/hadoop/hbase/regionserver/Memcache.java (working copy) @@ -229,7 +229,7 @@ if (b == null) { return a; } - return Bytes.compareTo(a, b) <= 0? a: b; + return HStoreKey.compareTwoRowKeys(regionInfo, a, b) <= 0? a: b; } /** @@ -259,12 +259,12 @@ synchronized (map) { // Make an HSK with maximum timestamp so we get past most of the current // rows cell entries. - HStoreKey hsk = new HStoreKey(row, HConstants.LATEST_TIMESTAMP); + HStoreKey hsk = new HStoreKey(row, HConstants.LATEST_TIMESTAMP, this.regionInfo); SortedMap tailMap = map.tailMap(hsk); // Iterate until we fall into the next row; i.e. move off current row for (Map.Entry es: tailMap.entrySet()) { HStoreKey itKey = es.getKey(); - if (Bytes.compareTo(itKey.getRow(), row) <= 0) { + if (HStoreKey.compareTwoRowKeys(regionInfo, itKey.getRow(), row) <= 0) { continue; } // Note: Not suppressing deletes or expired cells. @@ -330,7 +330,8 @@ } } } - } else if (Bytes.compareTo(key.getRow(), itKey.getRow()) < 0) { + } else if (HStoreKey.compareTwoRowKeys(regionInfo, key.getRow(), + itKey.getRow()) < 0) { break; } } @@ -377,8 +378,8 @@ final Set deletes) { // We want the earliest possible to start searching from. Start before // the candidate key in case it turns out a delete came in later. - HStoreKey search_key = candidateKeys.isEmpty()? new HStoreKey(row): - new HStoreKey(candidateKeys.firstKey().getRow()); + HStoreKey search_key = candidateKeys.isEmpty()? new HStoreKey(row, this.regionInfo): + new HStoreKey(candidateKeys.firstKey().getRow(), this.regionInfo); List victims = new ArrayList(); long now = System.currentTimeMillis(); @@ -469,7 +470,8 @@ // not a delete record. boolean deleted = HLogEdit.isDeleted(headMap.get(found_key)); if (lastRowFound != null && - !Bytes.equals(lastRowFound, found_key.getRow()) && !deleted) { + !HStoreKey.equalsTwoRowKeys(regionInfo, lastRowFound, + found_key.getRow()) && !deleted) { break; } // If this isn't a delete, record it as a candidate key. Also @@ -496,7 +498,7 @@ // smaller acceptable candidate keys would have caused us to start // our search earlier in the list, and we wouldn't be searching here. SortedMap thisRowTailMap = - headMap.tailMap(new HStoreKey(headMap.lastKey().getRow())); + headMap.tailMap(new HStoreKey(headMap.lastKey().getRow(), this.regionInfo)); Iterator key_iterator = thisRowTailMap.keySet().iterator(); do { HStoreKey found_key = key_iterator.next(); @@ -521,7 +523,7 @@ } static HStoreKey stripTimestamp(HStoreKey key) { - return new HStoreKey(key.getRow(), key.getColumn()); + return new HStoreKey(key.getRow(), key.getColumn(), key.getHRegionInfo()); } /* @@ -636,7 +638,8 @@ if (origin.getColumn() != null && origin.getColumn().length == 0) { // if the current and origin row don't match, then we can jump // out of the loop entirely. - if (!Bytes.equals(key.getRow(), origin.getRow())) { + if (!HStoreKey.equalsTwoRowKeys(regionInfo, key.getRow(), + origin.getRow())) { break; } // if the rows match but the timestamp is newer, skip it so we can @@ -721,6 +724,10 @@ // scanner or not. Will also save away timestamp. Also sorts rows. super(timestamp, targetCols); this.currentRow = firstRow; + if(this.currentRow != null && !this.currentRow.equals("")) + LOG.debug("MemcacheScanner : currentRow = firstRow = [" + Bytes.toString(this.currentRow) + "]"); + else + LOG.debug("MemcacheScanner : currentRow = '' emptyRow"); // If we're being asked to scan explicit columns rather than all in // a family or columns that match regexes, cache the sorted array of // columns. @@ -775,6 +782,7 @@ // We presume that timestamps have been handled properly when we // called getFull. if (!columnMatch(column)) { + LOG.debug("!columnMatch(" + Bytes.toString(column) + ") passed, skipping currentRow"); continue; } } @@ -785,9 +793,16 @@ c.getTimestamp() > latestTimestamp) { latestTimestamp = c.getTimestamp(); } + LOG.debug("MemcacheScanner : result add row[" + Bytes.toString(this.currentRow) + "] vsn[" + + this.timestamp + " col[" + Bytes.toString(column) + "]"); results.put(column, c); } this.currentRow = getNextRow(this.currentRow); + if(this.currentRow != null && !this.currentRow.equals("") && columns != null) + LOG.debug("MemcacheScanner : currentRow = [" + Bytes.toString(this.currentRow) + + "] and column[ " + (columns != null ? Bytes.toString(((TreeSet)columns).first()) : "none") + "]") ; + else + LOG.debug("MemcacheScanner : currentRow = '' emptyRow"); } // Set the timestamp to the largest one for the row if we would otherwise Index: src/java/org/apache/hadoop/hbase/regionserver/HStore.java =================================================================== --- src/java/org/apache/hadoop/hbase/regionserver/HStore.java (revision 692702) +++ src/java/org/apache/hadoop/hbase/regionserver/HStore.java (working copy) @@ -188,7 +188,13 @@ } this.desiredMaxFileSize = maxFileSize; - this.majorCompactionTime = conf.getLong("hbase.hregion.majorcompaction", 86400000); + this.majorCompactionTime = conf.getLong(HConstants.MAJOR_COMPACTION_PERIOD, 86400000); + + if (family.getValue(HConstants.MAJOR_COMPACTION_PERIOD) != null) { + String strCompactionTime = family.getValue(HConstants.MAJOR_COMPACTION_PERIOD); + this.majorCompactionTime = (new Long(strCompactionTime)).longValue(); + } + this.maxFilesToCompact = conf.getInt("hbase.hstore.compaction.max", 10); this.storeSize = 0L; @@ -319,7 +325,8 @@ || !HStoreKey.matchingFamily(family.getName(), column)) { continue; } - HStoreKey k = new HStoreKey(key.getRow(), column, val.getTimestamp()); + HStoreKey k = new HStoreKey(key.getRow(), column, val.getTimestamp(), + this.info); reconstructedCache.put(k, val.getVal()); editsCount++; // Every 2k edits, tell the reporter we're making progress. @@ -390,7 +397,7 @@ if (isReference) { reference = HStoreFile.readSplitInfo(p, fs); } - curfile = new HStoreFile(conf, fs, basedir, info.getEncodedName(), + curfile = new HStoreFile(conf, fs, basedir, this.info, family.getName(), fid, reference); long storeSeqId = -1; try { @@ -424,7 +431,9 @@ // Try fixing this file.. if we can. Use the hbase version of fix. // Need to remove the old index file first else fix won't go ahead. this.fs.delete(new Path(mapfile, MapFile.INDEX_FILE_NAME), false); - long count = MapFile.fix(this.fs, mapfile, HStoreFile.HbaseMapFile.KEY_CLASS, + // TODO: This is going to fail if we are to rebuild a file from + // meta because it won't have right comparator: HBASE-848. + long count = MapFile.fix(this.fs, mapfile, HStoreKey.class, HStoreFile.HbaseMapFile.VALUE_CLASS, false, this.conf); if (LOG.isDebugEnabled()) { LOG.debug("Fixed index on " + mapfile.toString() + "; had " + @@ -589,7 +598,7 @@ long now = System.currentTimeMillis(); // A. Write the Maps out to the disk HStoreFile flushedFile = new HStoreFile(conf, fs, basedir, - info.getEncodedName(), family.getName(), -1L, null); + this.info, family.getName(), -1L, null); MapFile.Writer out = flushedFile.getWriter(this.fs, this.compression, this.family.isBloomfilter(), cache.size()); out.setIndexInterval(family.getMapFileIndexInterval()); @@ -873,8 +882,7 @@ // Step through them, writing to the brand-new MapFile HStoreFile compactedOutputFile = new HStoreFile(conf, fs, - this.compactionDir, info.getEncodedName(), family.getName(), - -1L, null); + this.compactionDir, this.info, family.getName(), -1L, null); if (LOG.isDebugEnabled()) { LOG.debug("started compaction of " + rdrs.size() + " files into " + FSUtils.getPath(compactedOutputFile.getMapFilePath())); @@ -962,7 +970,7 @@ } } HStoreKey sk = keys[smallestKey]; - if (Bytes.equals(lastRow, sk.getRow()) + if (HStoreKey.equalsTwoRowKeys(info,lastRow, sk.getRow()) && Bytes.equals(lastColumn, sk.getColumn())) { timesSeen++; } else { @@ -1045,7 +1053,7 @@ try { // 1. Moving the new MapFile into place. HStoreFile finalCompactedFile = new HStoreFile(conf, fs, basedir, - info.getEncodedName(), family.getName(), -1, null); + this.info, family.getName(), -1, null); if (LOG.isDebugEnabled()) { LOG.debug("moving " + FSUtils.getPath(compactedFile.getMapFilePath()) + " to " + FSUtils.getPath(finalCompactedFile.getMapFilePath())); @@ -1207,7 +1215,7 @@ } } } - } else if (Bytes.compareTo(key.getRow(), readkey.getRow()) < 0) { + } else if (HStoreKey.compareTwoRowKeys(info,key.getRow(), readkey.getRow()) < 0) { // if we've crossed into the next row, then we can just stop // iterating break; @@ -1498,7 +1506,7 @@ } // If start row for this file is beyond passed in row, return; nothing // in here is of use to us. - if (Bytes.compareTo(startKey.getRow(), row) > 0) { + if (HStoreKey.compareTwoRowKeys(info,startKey.getRow(), row) > 0) { return; } long now = System.currentTimeMillis(); @@ -1531,7 +1539,7 @@ // up to the row before and return that. HStoreKey finalKey = getFinalKey(map); HStoreKey searchKey = null; - if (Bytes.compareTo(finalKey.getRow(), row) < 0) { + if (HStoreKey.compareTwoRowKeys(info,finalKey.getRow(), row) < 0) { searchKey = finalKey; } else { searchKey = new HStoreKey(row); @@ -1592,7 +1600,7 @@ do { // If we have an exact match on row, and it's not a delete, save this // as a candidate key - if (Bytes.equals(readkey.getRow(), searchKey.getRow())) { + if (HStoreKey.equalsTwoRowKeys(info,readkey.getRow(), searchKey.getRow())) { if (!HLogEdit.isDeleted(readval.get())) { if (handleNonDelete(readkey, now, deletes, candidateKeys)) { foundCandidate = true; @@ -1604,7 +1612,7 @@ if (deletedOrExpiredRow == null) { deletedOrExpiredRow = copy; } - } else if (Bytes.compareTo(readkey.getRow(), searchKey.getRow()) > 0) { + } else if (HStoreKey.compareTwoRowKeys(info,readkey.getRow(), searchKey.getRow()) > 0) { // if the row key we just read is beyond the key we're searching for, // then we're done. break; @@ -1685,9 +1693,9 @@ do { // if we have an exact match on row, and it's not a delete, save this // as a candidate key - if (Bytes.equals(readkey.getRow(), row)) { + if (HStoreKey.equalsTwoRowKeys(info,readkey.getRow(), row)) { handleKey(readkey, readval.get(), now, deletes, candidateKeys); - } else if (Bytes.compareTo(readkey.getRow(), row) > 0 ) { + } else if (HStoreKey.compareTwoRowKeys(info,readkey.getRow(), row) > 0 ) { // if the row key we just read is beyond the key we're searching for, // then we're done. break; @@ -1769,7 +1777,7 @@ } static HStoreKey stripTimestamp(HStoreKey key) { - return new HStoreKey(key.getRow(), key.getColumn()); + return new HStoreKey(key.getRow(), key.getColumn(), key.getHRegionInfo()); } /* @@ -1784,7 +1792,7 @@ // if the origin's column is empty, then we're matching any column if (Bytes.equals(origin.getColumn(), HConstants.EMPTY_BYTE_ARRAY)) { // if the row matches, then... - if (Bytes.equals(target.getRow(), origin.getRow())) { + if (HStoreKey.equalsTwoRowKeys(info, target.getRow(), origin.getRow())) { // check the timestamp return target.getTimestamp() <= origin.getTimestamp(); } @@ -1805,7 +1813,7 @@ // if the origin's column is empty, then we're matching any column if (Bytes.equals(origin.getColumn(), HConstants.EMPTY_BYTE_ARRAY)) { // if the row matches, then... - return Bytes.equals(target.getRow(), origin.getRow()); + return HStoreKey.equalsTwoRowKeys(info, target.getRow(), origin.getRow()); } // otherwise, we want to match on row and column return target.matchesRowCol(origin); @@ -1864,8 +1872,8 @@ if (mk != null) { // if the midkey is the same as the first and last keys, then we cannot // (ever) split this region. - if (Bytes.equals(mk.getRow(), firstKey.getRow()) && - Bytes.equals(mk.getRow(), lastKey.getRow())) { + if (HStoreKey.equalsTwoRowKeys(info, mk.getRow(), firstKey.getRow()) && + HStoreKey.equalsTwoRowKeys(info, mk.getRow(), lastKey.getRow())) { return null; } return new StoreSize(maxSize, mk.getRow()); @@ -1952,4 +1960,8 @@ return key; } } + + HRegionInfo getHRegionInfo() { + return this.info; + } } Index: src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java =================================================================== --- src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java (revision 692702) +++ src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java (working copy) @@ -119,25 +119,28 @@ private final HBaseConfiguration conf; private final FileSystem fs; private final Reference reference; + private final HRegionInfo hri; /** * Constructor that fully initializes the object * @param conf Configuration object * @param basedir qualified path that is parent of region directory - * @param encodedRegionName file name friendly name of the region * @param colFamily name of the column family * @param fileId file identifier * @param ref Reference to another HStoreFile. + * @param hri The region info for this file (HACK HBASE-868). TODO: Fix. * @throws IOException */ HStoreFile(HBaseConfiguration conf, FileSystem fs, Path basedir, - int encodedRegionName, byte [] colFamily, long fileId, - final Reference ref) throws IOException { + final HRegionInfo hri, byte [] colFamily, long fileId, + final Reference ref) + throws IOException { this.conf = conf; this.fs = fs; this.basedir = basedir; - this.encodedRegionName = encodedRegionName; + this.encodedRegionName = hri.getEncodedName(); this.colFamily = colFamily; + this.hri = hri; long id = fileId; if (id == -1) { @@ -431,7 +434,7 @@ "HStoreFile reference"); } return new BloomFilterMapFile.Writer(conf, fs, - getMapFilePath().toString(), compression, bloomFilter, nrows); + getMapFilePath().toString(), compression, bloomFilter, nrows, this.hri); } /** @@ -584,7 +587,6 @@ * Hbase customizations of MapFile. */ static class HbaseMapFile extends MapFile { - static final Class KEY_CLASS = HStoreKey.class; static final Class VALUE_CLASS = ImmutableBytesWritable.class; @@ -672,9 +674,10 @@ * @throws IOException */ public HbaseWriter(Configuration conf, FileSystem fs, String dirName, - SequenceFile.CompressionType compression) + SequenceFile.CompressionType compression, final HRegionInfo hri) throws IOException { - super(conf, fs, dirName, KEY_CLASS, VALUE_CLASS, compression); + super(conf, fs, dirName, new HStoreKey.HStoreKeyWritableComparator(hri), + VALUE_CLASS, compression); // Default for mapfiles is 128. Makes random reads faster if we // have more keys indexed and we're not 'next'-ing around in the // mapfile. @@ -788,14 +791,15 @@ * @param compression * @param filter * @param nrows + * @param hri * @throws IOException */ @SuppressWarnings("unchecked") public Writer(Configuration conf, FileSystem fs, String dirName, SequenceFile.CompressionType compression, final boolean filter, - int nrows) + int nrows, final HRegionInfo hri) throws IOException { - super(conf, fs, dirName, compression); + super(conf, fs, dirName, compression, hri); this.dirName = dirName; this.fs = fs; if (filter) { Index: src/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java =================================================================== --- src/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java (revision 692702) +++ src/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java (working copy) @@ -101,7 +101,7 @@ // Advance the readers to the first pos. for (i = 0; i < readers.length; i++) { - keys[i] = new HStoreKey(); + keys[i] = new HStoreKey(HConstants.EMPTY_BYTE_ARRAY, this.store.getHRegionInfo()); if (firstRow != null && firstRow.length != 0) { if (findFirstRow(i, firstRow)) { continue; @@ -159,7 +159,8 @@ for (int i = 0; i < keys.length; i++) { // Fetch the data while ((keys[i] != null) - && (Bytes.compareTo(keys[i].getRow(), viableRow.getRow()) == 0)) { + && (HStoreKey.compareTwoRowKeys(store.getHRegionInfo(), + keys[i].getRow(), viableRow.getRow()) == 0)) { // If we are doing a wild card match or there are multiple matchers // per column, we need to scan all the older versions of this row @@ -173,6 +174,8 @@ if(columnMatch(i)) { // We only want the first result for any specific family member if(!results.containsKey(keys[i].getColumn())) { + LOG.debug("SFS.next Adding to result col[" + Bytes.toString(keys[i].getColumn()) + "] " + + "val[" + Bytes.toString(vals[i]) + " +] ts[" + keys[i].getTimestamp() + "]"); results.put(keys[i].getColumn(), new Cell(vals[i], keys[i].getTimestamp())); insertedItem = true; @@ -187,7 +190,8 @@ // Advance the current scanner beyond the chosen row, to // a valid timestamp, so we're ready next time. while ((keys[i] != null) - && ((Bytes.compareTo(keys[i].getRow(), viableRow.getRow()) <= 0) + && ((HStoreKey.compareTwoRowKeys(store.getHRegionInfo(), + keys[i].getRow(), viableRow.getRow()) <= 0) || (keys[i].getTimestamp() > this.timestamp) || (! columnMatch(i)))) { getNext(i); @@ -246,8 +250,10 @@ // column matches and the timestamp of the row is less than or equal // to this.timestamp, so we do not need to test that here && ((viableRow == null) - || (Bytes.compareTo(keys[i].getRow(), viableRow) < 0) - || ((Bytes.compareTo(keys[i].getRow(), viableRow) == 0) + || (HStoreKey.compareTwoRowKeys(store.getHRegionInfo(), + keys[i].getRow(), viableRow) < 0) + || ((HStoreKey.compareTwoRowKeys(store.getHRegionInfo(), + keys[i].getRow(), viableRow) == 0) && (keys[i].getTimestamp() > viableTimestamp)))) { if (ttl == HConstants.FOREVER || now < keys[i].getTimestamp() + ttl) { viableRow = keys[i].getRow(); @@ -273,7 +279,7 @@ private boolean findFirstRow(int i, final byte [] firstRow) throws IOException { ImmutableBytesWritable ibw = new ImmutableBytesWritable(); HStoreKey firstKey - = (HStoreKey)readers[i].getClosest(new HStoreKey(firstRow), ibw); + = (HStoreKey)readers[i].getClosest(new HStoreKey(firstRow, this.store.getHRegionInfo()), ibw); if (firstKey == null) { // Didn't find it. Close the scanner and return TRUE closeSubScanner(i); Index: src/java/org/apache/hadoop/hbase/regionserver/HStoreScanner.java =================================================================== --- src/java/org/apache/hadoop/hbase/regionserver/HStoreScanner.java (revision 692702) +++ src/java/org/apache/hadoop/hbase/regionserver/HStoreScanner.java (working copy) @@ -119,8 +119,10 @@ for (int i = 0; i < this.keys.length; i++) { if (scanners[i] != null && (chosenRow == null || - (Bytes.compareTo(keys[i].getRow(), chosenRow) < 0) || - ((Bytes.compareTo(keys[i].getRow(), chosenRow) == 0) && + (HStoreKey.compareTwoRowKeys(store.getHRegionInfo(), + keys[i].getRow(), chosenRow) < 0) || + ((HStoreKey.compareTwoRowKeys(store.getHRegionInfo(), + keys[i].getRow(), chosenRow) == 0) && (keys[i].getTimestamp() > chosenTimestamp)))) { chosenRow = keys[i].getRow(); chosenTimestamp = keys[i].getTimestamp(); @@ -150,7 +152,8 @@ while ((scanners[i] != null && !filtered && moreToFollow) - && (Bytes.compareTo(keys[i].getRow(), chosenRow) == 0)) { + && (HStoreKey.compareTwoRowKeys(store.getHRegionInfo(), + keys[i].getRow(), chosenRow) == 0)) { // If we are doing a wild card match or there are multiple // matchers per column, we need to scan all the older versions of // this row to pick up the rest of the family members @@ -165,7 +168,7 @@ // values with older ones. So now we only insert // a result if the map does not contain the key. HStoreKey hsk = new HStoreKey(key.getRow(), HConstants.EMPTY_BYTE_ARRAY, - key.getTimestamp()); + key.getTimestamp(), this.store.getHRegionInfo()); for (Map.Entry e : resultSets[i].entrySet()) { hsk.setColumn(e.getKey()); if (HLogEdit.isDeleted(e.getValue().getValue())) { @@ -187,6 +190,8 @@ break; } } + LOG.debug("HSS.next PUT key[" + Bytes.toString(e.getKey()) + "] " + + "val[" + Bytes.toString(e.getValue().getValue()) + "]"); results.put(e.getKey(), e.getValue()); } } @@ -202,7 +207,8 @@ // If the current scanner is non-null AND has a lower-or-equal // row label, then its timestamp is bad. We need to advance it. while ((scanners[i] != null) && - (Bytes.compareTo(keys[i].getRow(), chosenRow) <= 0)) { + (HStoreKey.compareTwoRowKeys(store.getHRegionInfo(), + keys[i].getRow(), chosenRow) <= 0)) { resultSets[i].clear(); if (!scanners[i].next(keys[i], resultSets[i])) { closeScanner(i); Index: src/java/org/apache/hadoop/hbase/regionserver/HRegion.java =================================================================== --- src/java/org/apache/hadoop/hbase/regionserver/HRegion.java (revision 692702) +++ src/java/org/apache/hadoop/hbase/regionserver/HRegion.java (working copy) @@ -132,12 +134,14 @@ } // A's start key is null but B's isn't. Assume A comes before B } else if ((srcB.getStartKey() == null) // A is not null but B is - || (Bytes.compareTo(srcA.getStartKey(), srcB.getStartKey()) > 0)) { // A > B + || (HStoreKey.compareTwoRowKeys(srcA.getRegionInfo(), + srcA.getStartKey(), srcB.getStartKey()) > 0)) { // A > B a = srcB; b = srcA; } - if (!Bytes.equals(a.getEndKey(), b.getStartKey())) { + if (!HStoreKey.equalsTwoRowKeys(srcA.getRegionInfo(), + a.getEndKey(), b.getStartKey())) { throw new IOException("Cannot merge non-adjacent regions"); } return merge(a, b); @@ -181,13 +185,19 @@ HTableDescriptor tabledesc = a.getTableDesc(); HLog log = a.getLog(); Path basedir = a.getBaseDir(); - final byte [] startKey = Bytes.equals(a.getStartKey(), EMPTY_BYTE_ARRAY) || - Bytes.equals(b.getStartKey(), EMPTY_BYTE_ARRAY) ? EMPTY_BYTE_ARRAY : - Bytes.compareTo(a.getStartKey(), b.getStartKey()) <= 0 ? + final byte [] startKey = HStoreKey.equalsTwoRowKeys(a.getRegionInfo(), + a.getStartKey(), EMPTY_BYTE_ARRAY) || + HStoreKey.equalsTwoRowKeys(a.getRegionInfo(), + b.getStartKey(), EMPTY_BYTE_ARRAY) ? EMPTY_BYTE_ARRAY : + HStoreKey.compareTwoRowKeys(a.getRegionInfo(), a.getStartKey(), + b.getStartKey()) <= 0 ? a.getStartKey() : b.getStartKey(); - final byte [] endKey = Bytes.equals(a.getEndKey(), EMPTY_BYTE_ARRAY) || - Bytes.equals(b.getEndKey(), EMPTY_BYTE_ARRAY) ? EMPTY_BYTE_ARRAY : - Bytes.compareTo(a.getEndKey(), b.getEndKey()) <= 0 ? + final byte [] endKey = HStoreKey.equalsTwoRowKeys(a.getRegionInfo(), + a.getEndKey(), EMPTY_BYTE_ARRAY) || + HStoreKey.equalsTwoRowKeys(b.getRegionInfo(), b.getEndKey(), + EMPTY_BYTE_ARRAY) ? EMPTY_BYTE_ARRAY : + HStoreKey.compareTwoRowKeys(a.getRegionInfo(), a.getEndKey(), + b.getEndKey()) <= 0 ? b.getEndKey() : a.getEndKey(); HRegionInfo newRegionInfo = new HRegionInfo(tabledesc, startKey, endKey); @@ -232,7 +242,7 @@ } for (HStoreFile hsf: srcFiles) { HStoreFile dst = new HStoreFile(conf, fs, basedir, - newRegionInfo.getEncodedName(), colFamily, -1, null); + newRegionInfo, colFamily, -1, null); if (LOG.isDebugEnabled()) { LOG.debug("Renaming " + hsf + " to " + dst); } @@ -738,12 +748,12 @@ // Add start/end key checking: hbase-428. byte [] startKey = this.regionInfo.getStartKey(); byte [] endKey = this.regionInfo.getEndKey(); - if (Bytes.equals(startKey, midKey)) { + if (HStoreKey.equalsTwoRowKeys(this.regionInfo,startKey, midKey)) { LOG.debug("Startkey (" + startKey + ") and midkey + (" + midKey + ") are same, not splitting"); return null; } - if (Bytes.equals(midKey, endKey)) { + if (HStoreKey.equalsTwoRowKeys(this.regionInfo,midKey, endKey)) { LOG.debug("Endkey and midkey are same, not splitting"); return null; } @@ -789,15 +799,15 @@ // A reference to the bottom half of the hsf store file. HStoreFile.Reference aReference = new HStoreFile.Reference( this.regionInfo.getEncodedName(), h.getFileId(), - new HStoreKey(midKey), HStoreFile.Range.bottom); + new HStoreKey(midKey, this.regionInfo), HStoreFile.Range.bottom); HStoreFile a = new HStoreFile(this.conf, fs, splits, - regionAInfo.getEncodedName(), h.getColFamily(), -1, aReference); + regionAInfo, h.getColFamily(), -1, aReference); // Reference to top half of the hsf store file. HStoreFile.Reference bReference = new HStoreFile.Reference( this.regionInfo.getEncodedName(), h.getFileId(), new HStoreKey(midKey), HStoreFile.Range.top); HStoreFile b = new HStoreFile(this.conf, fs, splits, - regionBInfo.getEncodedName(), h.getColFamily(), -1, bReference); + regionBInfo, h.getColFamily(), -1, bReference); h.splitStoreFile(a, b, this.fs); } @@ -1163,7 +1173,7 @@ checkRow(row); checkColumn(column); // Don't need a row lock for a simple get - HStoreKey key = new HStoreKey(row, column, timestamp); + HStoreKey key = new HStoreKey(row, column, timestamp, this.regionInfo); return getStore(column).get(key, numVersions); } finally { splitsAndClosesLock.readLock().unlock(); @@ -1195,7 +1205,7 @@ checkColumn(column); } } - HStoreKey key = new HStoreKey(row, ts); + HStoreKey key = new HStoreKey(row, ts, this.regionInfo); Integer lid = obtainRowLock(row); HashSet storeSet = new HashSet(); try { @@ -1259,14 +1269,14 @@ byte [] closestKey = store.getRowKeyAtOrBefore(row); // if it happens to be an exact match, we can stop looping if (HStoreKey.equalsTwoRowKeys(regionInfo,row, closestKey)) { - key = new HStoreKey(closestKey); + key = new HStoreKey(closestKey, this.regionInfo); break; } // otherwise, we need to check if it's the max and move to the next if (closestKey != null && (key == null || HStoreKey.compareTwoRowKeys( regionInfo,closestKey, key.getRow()) > 0) ) { - key = new HStoreKey(closestKey); + key = new HStoreKey(closestKey, this.regionInfo); } } if (key == null) { @@ -1388,7 +1398,8 @@ try { List deletes = null; for (BatchOperation op: b) { - HStoreKey key = new HStoreKey(row, op.getColumn(), commitTime); + HStoreKey key = new HStoreKey(row, op.getColumn(), commitTime, + this.regionInfo); byte[] val = null; if (op.isPut()) { val = op.getValue(); @@ -1507,7 +1584,7 @@ long now = System.currentTimeMillis(); try { for (HStore store : stores.values()) { - List keys = store.getKeys(new HStoreKey(row, ts), + List keys = store.getKeys(new HStoreKey(row, ts, this.regionInfo), ALL_VERSIONS, now); TreeMap edits = new TreeMap(); for (HStoreKey key: keys) { @@ -1538,7 +1615,7 @@ // find the HStore for the column family HStore store = getStore(family); // find all the keys that match our criteria - List keys = store.getKeys(new HStoreKey(row, timestamp), + List keys = store.getKeys(new HStoreKey(row, timestamp, this.regionInfo), ALL_VERSIONS, now); // delete all the cells TreeMap edits = new TreeMap(); @@ -1566,7 +1643,7 @@ final long ts, final int versions) throws IOException { checkReadOnly(); - HStoreKey origin = new HStoreKey(row, column, ts); + HStoreKey origin = new HStoreKey(row, column, ts, this.regionInfo); Set keys = getKeys(origin, versions); if (keys.size() > 0) { TreeMap edits = new TreeMap(); @@ -1856,6 +2065,7 @@ throws IOException { this.filter = filter; this.scanners = new InternalScanner[stores.length]; + LOG.debug("HR.HScanner running, stores.length = " + stores.length); try { for (int i = 0; i < stores.length; i++) { @@ -1893,9 +2103,10 @@ this.resultSets = new TreeMap[scanners.length]; this.keys = new HStoreKey[scanners.length]; for (int i = 0; i < scanners.length; i++) { - keys[i] = new HStoreKey(); + keys[i] = new HStoreKey(HConstants.EMPTY_BYTE_ARRAY,regionInfo); resultSets[i] = new TreeMap(Bytes.BYTES_COMPARATOR); if(scanners[i] != null && !scanners[i].next(keys[i], resultSets[i])) { + closeScanner(i); } } @@ -1911,16 +2122,17 @@ throws IOException { boolean moreToFollow = false; boolean filtered = false; - do { // Find the lowest-possible key. byte [] chosenRow = null; long chosenTimestamp = -1; for (int i = 0; i < this.keys.length; i++) { if (scanners[i] != null && - (chosenRow == null || - (Bytes.compareTo(keys[i].getRow(), chosenRow) < 0) || - ((Bytes.compareTo(keys[i].getRow(), chosenRow) == 0) && + (chosenRow == null || + (HStoreKey.compareTwoRowKeys(regionInfo, + keys[i].getRow(), chosenRow) < 0) || + ((HStoreKey.compareTwoRowKeys(regionInfo, keys[i].getRow(), + chosenRow) == 0) && (keys[i].getTimestamp() > chosenTimestamp)))) { chosenRow = keys[i].getRow(); chosenTimestamp = keys[i].getTimestamp(); @@ -1937,7 +2149,7 @@ for (int i = 0; i < scanners.length; i++) { if (scanners[i] != null && - Bytes.compareTo(keys[i].getRow(), chosenRow) == 0) { + HStoreKey.compareTwoRowKeys(regionInfo,keys[i].getRow(), chosenRow) == 0) { // NOTE: We used to do results.putAll(resultSets[i]); // but this had the effect of overwriting newer // values with older ones. So now we only insert @@ -1959,7 +2171,7 @@ // If the current scanner is non-null AND has a lower-or-equal // row label, then its timestamp is bad. We need to advance it. while ((scanners[i] != null) && - (Bytes.compareTo(keys[i].getRow(), chosenRow) <= 0)) { + (HStoreKey.compareTwoRowKeys(regionInfo,keys[i].getRow(), chosenRow) <= 0)) { resultSets[i].clear(); if (!scanners[i].next(keys[i], resultSets[i])) { closeScanner(i); @@ -2138,7 +2350,8 @@ byte [] row = r.getRegionName(); Integer lid = meta.obtainRowLock(row); try { - HStoreKey key = new HStoreKey(row, COL_REGIONINFO, System.currentTimeMillis()); + HStoreKey key = new HStoreKey(row, COL_REGIONINFO, + System.currentTimeMillis(), r.getRegionInfo()); TreeMap edits = new TreeMap(); edits.put(key, Writables.getBytes(r.getRegionInfo())); meta.update(edits); @@ -2239,9 +2452,9 @@ */ public static boolean rowIsInRange(HRegionInfo info, final byte [] row) { return ((info.getStartKey().length == 0) || - (Bytes.compareTo(info.getStartKey(), row) <= 0)) && + (HStoreKey.compareTwoRowKeys(info,info.getStartKey(), row) <= 0)) && ((info.getEndKey().length == 0) || - (Bytes.compareTo(info.getEndKey(), row) > 0)); + (HStoreKey.compareTwoRowKeys(info,info.getEndKey(), row) > 0)); } /** Index: src/java/org/apache/hadoop/hbase/HRegionInfo.java =================================================================== --- src/java/org/apache/hadoop/hbase/HRegionInfo.java (revision 692702) +++ src/java/org/apache/hadoop/hbase/HRegionInfo.java (working copy) @@ -23,6 +23,7 @@ import java.io.DataOutput; import java.io.IOException; +import org.apache.hadoop.hbase.HStoreKey; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.JenkinsHash; import org.apache.hadoop.io.VersionedWritable; @@ -397,12 +398,12 @@ } // Compare start keys. - result = Bytes.compareTo(this.startKey, other.startKey); + result = HStoreKey.compareTwoRowKeys(other, this.startKey, other.startKey); if (result != 0) { return result; } // Compare end keys. - return Bytes.compareTo(this.endKey, other.endKey); + return HStoreKey.compareTwoRowKeys(other, this.endKey, other.endKey); } } Index: src/java/org/apache/hadoop/hbase/master/MetaRegion.java =================================================================== --- src/java/org/apache/hadoop/hbase/master/MetaRegion.java (revision 692702) +++ src/java/org/apache/hadoop/hbase/master/MetaRegion.java (working copy) @@ -20,7 +20,10 @@ package org.apache.hadoop.hbase.master; import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.HServerAddress; +import org.apache.hadoop.hbase.HStoreKey; import org.apache.hadoop.hbase.util.Bytes; @@ -90,7 +93,9 @@ public int compareTo(MetaRegion other) { int result = Bytes.compareTo(this.regionName, other.getRegionName()); if(result == 0) { - result = Bytes.compareTo(this.startKey, other.getStartKey()); + HRegionInfo hri = new HRegionInfo(HTableDescriptor.META_TABLEDESC, + HConstants.EMPTY_END_ROW, HConstants.EMPTY_END_ROW); + result = HStoreKey.compareTwoRowKeys(hri, this.startKey, other.getStartKey()); if (result == 0) { // Might be on different host? result = this.server.compareTo(other.server); Index: src/java/org/apache/hadoop/hbase/client/MetaScanner.java =================================================================== --- src/java/org/apache/hadoop/hbase/client/MetaScanner.java (revision 692702) +++ src/java/org/apache/hadoop/hbase/client/MetaScanner.java (working copy) @@ -5,6 +5,7 @@ import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HStoreKey; import org.apache.hadoop.hbase.io.RowResult; import org.apache.hadoop.hbase.util.Bytes; @@ -47,6 +48,7 @@ HRegionInfo.createRegionName(tableName, null, ZEROES); // Scan over each meta region + HRegionInfo hri; do { ScannerCallable callable = new ScannerCallable(connection, META_TABLE_NAME, COLUMN_FAMILY_ARRAY, startRow, LATEST_TIMESTAMP, null); @@ -62,12 +64,13 @@ } while(visitor.processRow(r)); // Advance the startRow to the end key of the current region startRow = callable.getHRegionInfo().getEndKey(); + hri = new HRegionInfo(callable.getHRegionInfo()); } finally { // Close scanner callable.setClose(); connection.getRegionServerWithRetries(callable); } - } while (Bytes.compareTo(startRow, LAST_ROW) != 0); + } while (HStoreKey.compareTwoRowKeys(hri, startRow, LAST_ROW) != 0); } /** Index: src/java/org/apache/hadoop/hbase/client/HConnectionManager.java =================================================================== --- src/java/org/apache/hadoop/hbase/client/HConnectionManager.java (revision 692702) +++ src/java/org/apache/hadoop/hbase/client/HConnectionManager.java (working copy) @@ -37,6 +37,7 @@ import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HRegionLocation; import org.apache.hadoop.hbase.HServerAddress; +import org.apache.hadoop.hbase.HStoreKey; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.LocalHBaseCluster; import org.apache.hadoop.hbase.MasterNotRunningException; @@ -328,7 +329,7 @@ if (currentRegion != null) { byte[] endKey = currentRegion.getEndKey(); if (endKey == null || - Bytes.equals(endKey, HConstants.EMPTY_BYTE_ARRAY)) { + HStoreKey.equalsTwoRowKeys(currentRegion, endKey, HConstants.EMPTY_BYTE_ARRAY)) { // We have reached the end of the table and we're done break; } @@ -641,8 +642,10 @@ // this one. the exception case is when the endkey is EMPTY_START_ROW, // signifying that the region we're checking is actually the last // region in the table. - if (Bytes.equals(endKey, HConstants.EMPTY_END_ROW) || - Bytes.compareTo(endKey, row) > 0) { + if (HStoreKey.equalsTwoRowKeys(possibleRegion.getRegionInfo(), + endKey, HConstants.EMPTY_END_ROW) || + HStoreKey.compareTwoRowKeys(possibleRegion.getRegionInfo(), + endKey, row) > 0) { return possibleRegion; } } @@ -689,7 +692,8 @@ // by nature of the map, we know that the start key has to be < // otherwise it wouldn't be in the headMap. - if (Bytes.compareTo(endKey, row) <= 0) { + if (HStoreKey.compareTwoRowKeys(possibleRegion.getRegionInfo(), + endKey, row) <= 0) { // delete any matching entry HRegionLocation rl = tableLocations.remove(matchingRegions.lastKey());