Index: src/java/org/apache/lucene/index/ByteBlockPool.java
===================================================================
--- src/java/org/apache/lucene/index/ByteBlockPool.java	(revision 999760)
+++ src/java/org/apache/lucene/index/ByteBlockPool.java	(working copy)
@@ -39,12 +39,12 @@
 import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_REF;
 import org.apache.lucene.util.ArrayUtil;
 
-final class ByteBlockPool {
+public final class ByteBlockPool {
 
-  abstract static class Allocator {
-    abstract void recycleByteBlocks(byte[][] blocks, int start, int end);
-    abstract void recycleByteBlocks(List<byte[]> blocks);
-    abstract byte[] getByteBlock();
+  public abstract static class Allocator {
+    public abstract void recycleByteBlocks(byte[][] blocks, int start, int end);
+    public abstract void recycleByteBlocks(List<byte[]> blocks);
+    public abstract byte[] getByteBlock();
   }
 
   public byte[][] buffers = new byte[10][];
Index: src/java/org/apache/lucene/util/BytesHash.java
===================================================================
--- src/java/org/apache/lucene/util/BytesHash.java	(revision 0)
+++ src/java/org/apache/lucene/util/BytesHash.java	(revision 0)
@@ -0,0 +1,377 @@
+package org.apache.lucene.util;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// nocommit -- move to util?
+import java.lang.reflect.Array;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.List;
+
+import org.apache.lucene.index.ByteBlockPool;
+
+/**
+ * Hash's BytesRefs.  BytesRef must be no longer than XXX in
+ * length.
+ *
+ * <p>NOTE: this class is meant only to be used internally
+ * by Lucene; it's only public so it can be shared across
+ * packages.  This means the API is freely subject to
+ * change, and, the class could be removed entirely, in any
+ * Lucene release.  Use directly at your own risk!
+ */
+
+// nocommit -- reuse Entry instances?
+public abstract class BytesHash<T extends BytesHash.Entry> {
+
+  // nocommit -- factor properly so the byte pool uses this
+  // NOT DW's
+  public final static int BYTES_BLOCK_SHIFT = 15;
+  public final static int BYTES_BLOCK_SIZE = 1 << BYTES_BLOCK_SHIFT;
+  public final static int BYTES_BLOCK_MASK = BYTES_BLOCK_SIZE - 1;
+
+  // nocommit -- reuse?
+  private static class ByteBlockAllocator extends ByteBlockPool.Allocator {
+    int blockUsedCount;
+
+    @Override
+    public byte[] getByteBlock() {
+      blockUsedCount++;
+      return new byte[BYTES_BLOCK_SIZE];
+    }
+
+    @Override
+    public void recycleByteBlocks(byte[][] blocks, int start, int end) {
+      blockUsedCount -= end-start;
+    }
+
+    public long ramBytesUsed() {
+      return blockUsedCount * BYTES_BLOCK_SIZE;
+    }
+
+    @Override
+    public void recycleByteBlocks(List<byte[]> blocks) {
+      blockUsedCount -= blocks.size();
+    }
+  }
+
+  public static class Entry {
+    public int bytesStart;
+  }
+
+  private final Class<T> cl;
+  public final ByteBlockPool pool;
+  private int hashSize = 4;
+  private int hashHalfSize = hashSize/2;
+  private int hashMask = hashSize-1;
+  private int count;
+  private int lastCount = -1;
+  private final ByteBlockAllocator allocator;
+  private T[] hash;
+
+  @SuppressWarnings("unchecked")
+  public BytesHash(Class<T> cl) {
+    this.cl = cl;
+    allocator = new ByteBlockAllocator();
+    pool = new ByteBlockPool(allocator);
+    hash = (T[]) Array.newInstance(cl, hashSize);
+  }
+
+  public int size() {
+    return count;
+  }
+
+  public BytesRef getBytes(T e) {
+    return deref(e.bytesStart, scratch1);
+  }
+
+  /** Destructive operation -- returns all Entry instances,
+   *  in arbitrary order */
+  public T[] compact() {
+    int upto = 0;
+    for(int i=0;i<hashSize;i++) {
+      if (hash[i] != null) {
+        if (upto < i) {
+          hash[upto] = hash[i];
+          hash[i] = null;
+        }
+        upto++;
+      }
+    }
+
+    assert upto == count;
+    lastCount = count;
+
+    return hash;
+  }
+
+  /** Destructive operation -- returns all Entry instances sorted */
+  public T[] sort(Comparator<BytesRef>comp) {
+    compact();
+    quickSort(comp, hash, 0, count-1);
+    return hash;
+  }
+
+  void quickSort(Comparator<BytesRef> comp, T[] entries, int lo, int hi) {
+    if (lo >= hi)
+      return;
+    else if (hi == 1+lo) {
+      if (compare(comp, entries[lo], entries[hi]) > 0) {
+        final T tmp = entries[lo];
+        entries[lo] = entries[hi];
+        entries[hi] = tmp;
+      }
+      return;
+    }
+    int mid = (lo + hi) >>> 1;
+    if (compare(comp, entries[lo], entries[mid]) > 0) {
+      T tmp = entries[lo];
+      entries[lo] = entries[mid];
+      entries[mid] = tmp;
+    }
+
+    if (compare(comp, entries[mid], entries[hi]) > 0) {
+      T tmp = entries[mid];
+      entries[mid] = entries[hi];
+      entries[hi] = tmp;
+
+      if (compare(comp, entries[lo], entries[mid]) > 0) {
+        T tmp2 = entries[lo];
+        entries[lo] = entries[mid];
+        entries[mid] = tmp2;
+      }
+    }
+
+    int left = lo + 1;
+    int right = hi - 1;
+
+    if (left >= right)
+      return;
+
+    T partition = entries[mid];
+
+    for (; ;) {
+      while (compare(comp, entries[right], partition) > 0)
+        --right;
+
+      while (left < right && compare(comp, entries[left], partition) <= 0)
+        ++left;
+
+      if (left < right) {
+        T tmp = entries[left];
+        entries[left] = entries[right];
+        entries[right] = tmp;
+        --right;
+      } else {
+        break;
+      }
+    }
+
+    quickSort(comp, entries, lo, left);
+    quickSort(comp, entries, left + 1, hi);
+  }
+
+  private final BytesRef scratch1 = new BytesRef();
+  private final BytesRef scratch2 = new BytesRef();
+
+  private final BytesRef deref(int bytesStart, BytesRef b) {
+    b.bytes = pool.buffers[bytesStart >> BYTES_BLOCK_SHIFT];
+    int pos = bytesStart & BYTES_BLOCK_MASK;
+
+    if ((b.bytes[pos] & 0x80) == 0) {
+      // length is 1 byte
+      b.length = b.bytes[pos];
+      pos += 1;
+    } else {
+      // length is 2 bytes
+      b.length = (b.bytes[pos]&0x7f) + ((b.bytes[pos+1]&0xff)<<7);
+      pos += 2;
+    }
+    b.offset = pos;
+    return b;
+  }
+
+  private boolean equals(T e, BytesRef b) {
+    return deref(e.bytesStart, scratch1).bytesEquals(b);
+  }
+
+  private int compare(Comparator<BytesRef> comp, T e1, T e2) {
+    return comp.compare(deref(e1.bytesStart, scratch1),
+                        deref(e2.bytesStart, scratch2));
+  }
+
+  @SuppressWarnings("unchecked")
+  private boolean shrink(int targetSize) {
+
+    // Cannot use ArrayUtil.shrink because we require power
+    // of 2:
+    int newSize = hashSize;
+    while(newSize >= 8 && newSize/4 > targetSize) {
+      newSize /= 2;
+    }
+
+    if (newSize != hashSize) {
+      hashSize = newSize;
+      hash = (T[]) Array.newInstance(cl, hashSize);
+      hashHalfSize = newSize/2;
+      hashMask = newSize-1;
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  public void clear() {
+    lastCount = count;
+    count = 0;
+    if (lastCount != -1) {
+      if (shrink(lastCount)) {
+        // shrink clears the hash entries
+        return;
+      }
+    }
+    Arrays.fill(hash, null);
+  }
+
+  public T add(BytesRef bytes) {
+    int code = 0;
+    final int end = bytes.offset + bytes.length;
+    // build hash
+    for(int i=bytes.offset;i<end;i++) {
+      code = 31*code + bytes.bytes[i];
+    }
+
+    // final position
+    int hashPos = code & hashMask;
+    T e = hash[hashPos];
+
+    if (e != null && !equals(e, bytes)) {
+      // Conflict: keep searching different locations in
+      // the hash table.
+      final int inc = ((code>>8)+code)|1;
+      do {
+        code += inc;
+        hashPos = code & hashMask;
+        e = hash[hashPos];
+      } while (e != null && !equals(e, bytes));
+    }
+
+    if (e == null) {
+      // new entry
+      final int len2 = 2+bytes.length;
+      if (len2 + pool.byteUpto > BYTES_BLOCK_SIZE) {
+        if (len2 > BYTES_BLOCK_SIZE) {
+          throw new IllegalArgumentException("bytes can be at most " + (BYTES_BLOCK_SIZE-2) + " in length; got " + bytes.length);
+        }
+        pool.nextBuffer();
+      }
+
+      e = newEntry();
+
+      final byte[] buffer = pool.buffer;
+      final int bufferUpto = pool.byteUpto;
+      e.bytesStart = bufferUpto + pool.byteOffset;
+
+      // We first encode the length, followed by the
+      // bytes.  Length is encoded as vInt, but will consume
+      // 1 or 2 bytes at most (we reject too-long terms,
+      // above).
+      if (bytes.length < 128) {
+        // 1 byte to store length
+        buffer[bufferUpto] = (byte) bytes.length;
+        pool.byteUpto += bytes.length + 1;
+        System.arraycopy(bytes.bytes, bytes.offset, buffer, bufferUpto+1, bytes.length);
+      } else {
+        // 2 byte to store length
+        buffer[bufferUpto] = (byte) (0x80 | (bytes.length & 0x7f));
+        buffer[bufferUpto+1] = (byte) ((bytes.length>>7) & 0xff);
+        pool.byteUpto += bytes.length + 2;
+        System.arraycopy(bytes.bytes, bytes.offset, buffer, bufferUpto+2, bytes.length);
+      }
+      assert hash[hashPos] == null;
+      hash[hashPos] = e;
+      count++;
+
+      if (count == hashHalfSize) {
+        rehash(2*hashSize);
+      }
+    }
+    return e;
+  }
+
+  /** Called when postings hash is too small (> 50%
+   *  occupied) or too large (< 20% occupied). */
+  void rehash(final int newSize) {
+
+    final int newMask = newSize-1;
+
+    @SuppressWarnings("unchecked")
+    T[] newHash = (T[]) Array.newInstance(cl, newSize);
+    for(int i=0;i<hashSize;i++) {
+      T e0 = hash[i];
+      if (e0 != null) {
+        int code;
+        final int start = e0.bytesStart & BYTES_BLOCK_MASK;
+        final byte[] bytes = pool.buffers[e0.bytesStart >> BYTES_BLOCK_SHIFT];
+        code = 0;
+
+        final int len;
+        int pos;
+        if ((bytes[start] & 0x80) == 0) {
+          // length is 1 byte
+          len = bytes[start];
+          pos = start+1;
+        } else {
+          len = (bytes[start]&0x7f) + ((bytes[start+1]&0xff)<<7);
+          pos = start+2;
+        }
+
+        final int endPos = pos+len;
+        while(pos < endPos) {
+          code = (code*31) + bytes[pos++];
+        }
+
+        int hashPos = code & newMask;
+        assert hashPos >= 0;
+        if (newHash[hashPos] != null) {
+          final int inc = ((code>>8)+code)|1;
+          do {
+            code += inc;
+            hashPos = code & newMask;
+          } while (newHash[hashPos] != null);
+        }
+        newHash[hashPos] = e0;
+      }
+    }
+
+    hashMask = newMask;
+    hash = newHash;
+    hashSize = newSize;
+    hashHalfSize = newSize >> 1;
+  }
+
+  protected abstract T newEntry();
+
+  public long ramBytesUsed() {
+    return allocator.ramBytesUsed() + RamUsageEstimator.NUM_BYTES_OBJ_REF * hashSize + count * bytesPerEntry();
+  }
+
+  protected long bytesPerEntry() {
+    return RamUsageEstimator.NUM_BYTES_OBJ_HEADER + RamUsageEstimator.NUM_BYTES_INT;
+  }
+}
