Index: lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java =================================================================== --- lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java (revision 1457389) +++ lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java (working copy) @@ -30,8 +30,8 @@ /** * {@link BytesRefHash} is a special purpose hash-map like data-structure * optimized for {@link BytesRef} instances. BytesRefHash maintains mappings of - * byte arrays to ordinal (Map<BytesRef,int>) storing the hashed bytes - * efficiently in continuous storage. The mapping to the ordinal is + * byte arrays to ids (Map<BytesRef,int>) storing the hashed bytes + * efficiently in continuous storage. The mapping to the id is * encapsulated inside {@link BytesRefHash} and is guaranteed to be increased * for each added {@link BytesRef}. * @@ -58,7 +58,7 @@ private int hashMask; private int count; private int lastCount = -1; - private int[] ords; + private int[] ids; private final BytesStartArray bytesStartArray; private Counter bytesUsed; @@ -86,8 +86,8 @@ hashHalfSize = hashSize >> 1; hashMask = hashSize - 1; this.pool = pool; - ords = new int[hashSize]; - Arrays.fill(ords, -1); + ids = new int[hashSize]; + Arrays.fill(ids, -1); this.bytesStartArray = bytesStartArray; bytesStart = bytesStartArray.init(); bytesUsed = bytesStartArray.bytesUsed() == null? Counter.newCounter() : bytesStartArray.bytesUsed(); @@ -104,26 +104,29 @@ } /** - * Populates and returns a {@link BytesRef} with the bytes for the given ord. + * Populates and returns a {@link BytesRef} with the bytes for the given + * bytesID. *
- * Note: the given ord must be a positive integer less that the current size ( - * {@link #size()}) - *
- * - * @param ord the ord - * @param ref the {@link BytesRef} to populate + * Note: the given bytesID must be a positive integer less that the current + * size ({@link #size()}) * - * @return the given BytesRef instance populated with the bytes for the given ord + * @param bytesID + * the id + * @param ref + * the {@link BytesRef} to populate + * + * @return the given BytesRef instance populated with the bytes for the given + * bytesID */ - public BytesRef get(int ord, BytesRef ref) { + public BytesRef get(int bytesID, BytesRef ref) { assert bytesStart != null : "bytesStart is null - not initialized"; - assert ord < bytesStart.length: "ord exceeds byteStart len: " + bytesStart.length; - pool.setBytesRef(ref, bytesStart[ord]); + assert bytesID < bytesStart.length: "bytesID exceeds byteStart len: " + bytesStart.length; + pool.setBytesRef(ref, bytesStart[bytesID]); return ref; } /** - * Returns the ords array in arbitrary order. Valid ords start at offset of 0 + * Returns the ids array in arbitrary order. Valid ids start at offset of 0 * and end at a limit of {@link #size()} - 1 ** Note: This is a destructive operation. {@link #clear()} must be called in @@ -131,13 +134,13 @@ *
*/ int[] compact() { - assert bytesStart != null : "Bytesstart is null - not initialized"; + assert bytesStart != null : "bytesStart is null - not initialized"; int upto = 0; for (int i = 0; i < hashSize; i++) { - if (ords[i] != -1) { + if (ids[i] != -1) { if (upto < i) { - ords[upto] = ords[i]; - ords[i] = -1; + ids[upto] = ids[i]; + ids[i] = -1; } upto++; } @@ -145,7 +148,7 @@ assert upto == count; lastCount = count; - return ords; + return ids; } /** @@ -170,25 +173,25 @@ @Override protected int compare(int i, int j) { - final int ord1 = compact[i], ord2 = compact[j]; - assert bytesStart.length > ord1 && bytesStart.length > ord2; - pool.setBytesRef(scratch1, bytesStart[ord1]); - pool.setBytesRef(scratch2, bytesStart[ord2]); + final int id1 = compact[i], id2 = compact[j]; + assert bytesStart.length > id1 && bytesStart.length > id2; + pool.setBytesRef(scratch1, bytesStart[id1]); + pool.setBytesRef(scratch2, bytesStart[id2]); return comp.compare(scratch1, scratch2); } @Override protected void setPivot(int i) { - final int ord = compact[i]; - assert bytesStart.length > ord; - pool.setBytesRef(pivot, bytesStart[ord]); + final int id = compact[i]; + assert bytesStart.length > id; + pool.setBytesRef(pivot, bytesStart[id]); } @Override protected int comparePivot(int j) { - final int ord = compact[j]; - assert bytesStart.length > ord; - pool.setBytesRef(scratch2, bytesStart[ord]); + final int id = compact[j]; + assert bytesStart.length > id; + pool.setBytesRef(scratch2, bytesStart[id]); return comp.compare(pivot, scratch2); } @@ -198,8 +201,8 @@ return compact; } - private boolean equals(int ord, BytesRef b) { - pool.setBytesRef(scratch1, bytesStart[ord]); + private boolean equals(int id, BytesRef b) { + pool.setBytesRef(scratch1, bytesStart[id]); return scratch1.bytesEquals(b); } @@ -213,8 +216,8 @@ if (newSize != hashSize) { bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT * -(hashSize - newSize)); hashSize = newSize; - ords = new int[hashSize]; - Arrays.fill(ords, -1); + ids = new int[hashSize]; + Arrays.fill(ids, -1); hashHalfSize = newSize / 2; hashMask = newSize - 1; return true; @@ -237,7 +240,7 @@ // shrink clears the hash entries return; } - Arrays.fill(ords, -1); + Arrays.fill(ids, -1); } public void clear() { @@ -249,7 +252,7 @@ */ public void close() { clear(true); - ords = null; + ids = null; bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT * -hashSize); } @@ -258,8 +261,8 @@ * * @param bytes * the bytes to hash - * @return the ord the given bytes are hashed if there was no mapping for the - * given bytes, otherwise(-(ord)-1). This guarantees
+ * @return the id the given bytes are hashed if there was no mapping for the
+ * given bytes, otherwise (-(id)-1). This guarantees
* that the return value will always be >= 0 if the given bytes
* haven't been hashed before.
*
@@ -289,8 +292,8 @@
* }
*
*
- * @return the ord the given bytes are hashed if there was no mapping for the
- * given bytes, otherwise (-(ord)-1). This guarantees
+ * @return the id the given bytes are hashed if there was no mapping for the
+ * given bytes, otherwise (-(id)-1). This guarantees
* that the return value will always be >= 0 if the given bytes
* haven't been hashed before.
*
@@ -303,7 +306,7 @@
final int length = bytes.length;
// final position
int hashPos = code & hashMask;
- int e = ords[hashPos];
+ int e = ids[hashPos];
if (e != -1 && !equals(e, bytes)) {
// Conflict: keep searching different locations in
// the hash table.
@@ -311,7 +314,7 @@
do {
code += inc;
hashPos = code & hashMask;
- e = ords[hashPos];
+ e = ids[hashPos];
} while (e != -1 && !equals(e, bytes));
}
@@ -355,8 +358,8 @@
System.arraycopy(bytes.bytes, bytes.offset, buffer, bufferUpto + 2,
length);
}
- assert ords[hashPos] == -1;
- ords[hashPos] = e;
+ assert ids[hashPos] == -1;
+ ids[hashPos] = e;
if (count == hashHalfSize) {
rehash(2 * hashSize, true);
@@ -365,13 +368,53 @@
}
return -(e + 1);
}
+
+ /**
+ * Returns the id of the given {@link BytesRef}.
+ *
+ * @see #find(BytesRef, int)
+ */
+ public int find(BytesRef bytes) {
+ return find(bytes, bytes.hashCode());
+ }
+ /**
+ * Returns the id of the given {@link BytesRef} with a pre-calculated hash code.
+ *
+ * @param bytes
+ * the bytes to look for
+ * @param code
+ * the bytes hash code
+ *
+ * @return the id of the given bytes, or {@code -1} if there is no mapping for the
+ * given bytes.
+ */
+ public int find(BytesRef bytes, int code) {
+ assert bytesStart != null : "Bytesstart is null - not initialized";
+ // final position
+ int hashPos = code & hashMask;
+ int e = ids[hashPos];
+ if (e != -1 && !equals(e, bytes)) {
+ // Conflict: keep searching different locations in
+ // the hash table.
+ final int inc = ((code >> 8) + code) | 1;
+ do {
+ code += inc;
+ hashPos = code & hashMask;
+ e = ids[hashPos];
+ } while (e != -1 && !equals(e, bytes));
+ }
+
+ // 'e' is either -1 or the id of the requested bytes.
+ return e;
+ }
+
public int addByPoolOffset(int offset) {
assert bytesStart != null : "Bytesstart is null - not initialized";
// final position
int code = offset;
int hashPos = offset & hashMask;
- int e = ords[hashPos];
+ int e = ids[hashPos];
if (e != -1 && bytesStart[e] != offset) {
// Conflict: keep searching different locations in
// the hash table.
@@ -379,7 +422,7 @@
do {
code += inc;
hashPos = code & hashMask;
- e = ords[hashPos];
+ e = ids[hashPos];
} while (e != -1 && bytesStart[e] != offset);
}
if (e == -1) {
@@ -391,8 +434,8 @@
}
e = count++;
bytesStart[e] = offset;
- assert ords[hashPos] == -1;
- ords[hashPos] = e;
+ assert ids[hashPos] == -1;
+ ids[hashPos] = e;
if (count == hashHalfSize) {
rehash(2 * hashSize, false);
@@ -412,7 +455,7 @@
final int[] newHash = new int[newSize];
Arrays.fill(newHash, -1);
for (int i = 0; i < hashSize; i++) {
- final int e0 = ords[i];
+ final int e0 = ids[i];
if (e0 != -1) {
int code;
if (hashOnData) {
@@ -453,8 +496,8 @@
}
hashMask = newMask;
- bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT * (-ords.length));
- ords = newHash;
+ bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT * (-ids.length));
+ ids = newHash;
hashSize = newSize;
hashHalfSize = newSize / 2;
}
@@ -469,25 +512,25 @@
bytesStart = bytesStartArray.init();
}
- if (ords == null) {
- ords = new int[hashSize];
+ if (ids == null) {
+ ids = new int[hashSize];
bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT * hashSize);
}
}
/**
* Returns the bytesStart offset into the internally used
- * {@link ByteBlockPool} for the given ord
+ * {@link ByteBlockPool} for the given bytesID
*
- * @param ord
- * the ord to look up
+ * @param bytesID
+ * the id to look up
* @return the bytesStart offset into the internally used
- * {@link ByteBlockPool} for the given ord
+ * {@link ByteBlockPool} for the given id
*/
- public int byteStart(int ord) {
- assert bytesStart != null : "Bytesstart is null - not initialized";
- assert ord >= 0 && ord < count : ord;
- return bytesStart[ord];
+ public int byteStart(int bytesID) {
+ assert bytesStart != null : "bytesStart is null - not initialized";
+ assert bytesID >= 0 && bytesID < count : bytesID;
+ return bytesStart[bytesID];
}
/**
Index: lucene/core/src/test/org/apache/lucene/util/TestBytesRefHash.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/util/TestBytesRefHash.java (revision 1457389)
+++ lucene/core/src/test/org/apache/lucene/util/TestBytesRefHash.java (working copy)
@@ -30,16 +30,11 @@
import org.junit.Before;
import org.junit.Test;
-/**
- *
- */
public class TestBytesRefHash extends LuceneTestCase {
BytesRefHash hash;
ByteBlockPool pool;
- /**
- */
@Override
@Before
public void setUp() throws Exception {
@@ -248,6 +243,43 @@
hash.reinit();
}
}
+
+ @Test
+ public void testFind() throws Exception {
+ BytesRef ref = new BytesRef();
+ BytesRef scratch = new BytesRef();
+ int num = atLeast(2);
+ for (int j = 0; j < num; j++) {
+ Set