Index: lucene/core/src/test/org/apache/lucene/util/TestCompressedDocIdSet.java
===================================================================
--- lucene/core/src/test/org/apache/lucene/util/TestCompressedDocIdSet.java	(revision 0)
+++ lucene/core/src/test/org/apache/lucene/util/TestCompressedDocIdSet.java	(working copy)
@@ -0,0 +1,139 @@
+package org.apache.lucene.util;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.DocIdSetIterator;
+
+public class TestCompressedDocIdSet extends LuceneTestCase {
+
+  private static FixedBitSet randomSet(int numBits, int numBitsSet) {
+    final FixedBitSet set = new FixedBitSet(numBits);
+    for (int i = 0; i < numBitsSet; ++i) {
+      while (true) {
+        final int o = random().nextInt(numBits);
+        if (!set.get(o)) {
+          set.set(o);
+          break;
+        }
+      }
+    }
+    return set;
+  }
+
+  private static FixedBitSet randomSet(int numBits, float percentSet) {
+    return randomSet(numBits, (int) (percentSet * numBits));
+  }
+
+  public void testAgainstFixedBitSet() throws IOException {
+    final int numBits = _TestUtil.nextInt(random(), 100, 1 << 20);
+    for (float percentSet : new float[] {0f, 0.0001f, random().nextFloat() / 2, 0.9f}) {
+      final FixedBitSet set = randomSet(numBits, percentSet);
+      final CompressedDocIdSet copy = CompressedDocIdSet.copyOf(set.iterator());
+      assertEquals(numBits, set, copy);
+    }
+  }
+
+  public void assertEquals(int numBits, FixedBitSet ds1, CompressedDocIdSet ds2) throws IOException {
+    assertEquals(ds1.cardinality(), ds2.cardinality());
+
+    // nextDoc
+    DocIdSetIterator it1 = ds1.iterator();
+    DocIdSetIterator it2 = ds2.iterator();
+    assertEquals(it1.docID(), it2.docID());
+    for (int doc = it1.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it1.nextDoc()) {
+      assertEquals(doc, it2.nextDoc());
+      assertEquals(it1.docID(), it2.docID());
+    }
+    assertEquals(DocIdSetIterator.NO_MORE_DOCS, it2.nextDoc());
+    assertEquals(it1.docID(), it2.docID());
+
+    // nextDoc / advance
+    it1 = ds1.iterator();
+    it2 = ds2.iterator();
+    for (int doc = -1; doc != DocIdSetIterator.NO_MORE_DOCS;) {
+      if (random().nextBoolean()) {
+        doc = it1.nextDoc();
+        assertEquals(doc, it2.nextDoc());
+        assertEquals(it1.docID(), it2.docID());
+      } else {
+        final int target = doc + 1 + random().nextInt(random().nextBoolean() ? 64 : numBits / 64);
+        doc = it1.advance(target);
+        assertEquals(doc, it2.advance(target));
+        assertEquals(it1.docID(), it2.docID());
+      }
+    }
+  }
+
+  public void testUnion() throws IOException {
+    final int numBits = _TestUtil.nextInt(random(), 100, 1 << 20);
+    final int numDocIdSets = _TestUtil.nextInt(random(), 0, 4);
+    final List<FixedBitSet> fixedSets = new ArrayList<FixedBitSet>(numDocIdSets);
+    for (int i = 0; i < numDocIdSets; ++i) {
+      fixedSets.add(randomSet(numBits, random().nextFloat() / 16));
+    }
+    final List<CompressedDocIdSet> compressedSets = new ArrayList<CompressedDocIdSet>(numDocIdSets);
+    for (FixedBitSet set : fixedSets) {
+      compressedSets.add(CompressedDocIdSet.copyOf(set.iterator()));
+    }
+
+    final CompressedDocIdSet union = CompressedDocIdSet.union(compressedSets);
+    final FixedBitSet expected = new FixedBitSet(numBits);
+    for (DocIdSet set : fixedSets) {
+      final DocIdSetIterator it = set.iterator();
+      for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
+        expected.set(doc);
+      }
+    }
+    assertEquals(numBits, expected, union);
+  }
+
+  public void testIntersection() throws IOException {
+    final int numBits = _TestUtil.nextInt(random(), 500, 500);
+    final int numDocIdSets = _TestUtil.nextInt(random(), 1, 4);
+    final List<FixedBitSet> fixedSets = new ArrayList<FixedBitSet>(numDocIdSets);
+    for (int i = 0; i < numDocIdSets; ++i) {
+      fixedSets.add(randomSet(numBits, random().nextFloat()));
+    }
+    final List<CompressedDocIdSet> compressedSets = new ArrayList<CompressedDocIdSet>(numDocIdSets);
+    for (FixedBitSet set : fixedSets) {
+      compressedSets.add(CompressedDocIdSet.copyOf(set.iterator()));
+    }
+
+    final CompressedDocIdSet union = CompressedDocIdSet.intersect(compressedSets);
+    final FixedBitSet expected = new FixedBitSet(numBits);
+    expected.set(0, expected.length());
+    for (DocIdSet set : fixedSets) {
+      final DocIdSetIterator it = set.iterator();
+      int lastDoc = -1;
+      for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
+        expected.clear(lastDoc + 1, doc);
+        lastDoc = doc;
+      }
+      if (lastDoc + 1 < expected.length()) {
+        expected.clear(lastDoc + 1, expected.length());
+      }
+    }
+    assertEquals(numBits, expected, union);
+  }
+
+}

Property changes on: lucene/core/src/test/org/apache/lucene/util/TestCompressedDocIdSet.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java	(revision 1497809)
+++ lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java	(working copy)
@@ -44,6 +44,7 @@
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.GrowableByteArrayDataOutput;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.packed.PackedInts;
 
Index: lucene/core/src/java/org/apache/lucene/codecs/compressing/GrowableByteArrayDataOutput.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/compressing/GrowableByteArrayDataOutput.java	(revision 1497809)
+++ lucene/core/src/java/org/apache/lucene/codecs/compressing/GrowableByteArrayDataOutput.java	(working copy)
@@ -1,56 +0,0 @@
-package org.apache.lucene.codecs.compressing;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-import org.apache.lucene.store.DataOutput;
-import org.apache.lucene.util.ArrayUtil;
-
-/**
- * A {@link DataOutput} that can be used to build a byte[].
- */
-final class GrowableByteArrayDataOutput extends DataOutput {
-
-  byte[] bytes;
-  int length;
-
-  GrowableByteArrayDataOutput(int cp) {
-    this.bytes = new byte[ArrayUtil.oversize(cp, 1)];
-    this.length = 0;
-  }
-
-  @Override
-  public void writeByte(byte b) throws IOException {
-    if (length >= bytes.length) {
-      bytes = ArrayUtil.grow(bytes);
-    }
-    bytes[length++] = b;
-  }
-
-  @Override
-  public void writeBytes(byte[] b, int off, int len) throws IOException {
-    final int newLength = length + len;
-    if (newLength > bytes.length) {
-      bytes = ArrayUtil.grow(bytes, newLength);
-    }
-    System.arraycopy(b, off, bytes, length, len);
-    length = newLength;
-  }
-
-}
Index: lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java	(revision 1497809)
+++ lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java	(working copy)
@@ -45,6 +45,7 @@
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.GrowableByteArrayDataOutput;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.packed.BlockPackedWriter;
Index: lucene/core/src/java/org/apache/lucene/codecs/lucene40/BitVector.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/codecs/lucene40/BitVector.java	(revision 1497809)
+++ lucene/core/src/java/org/apache/lucene/codecs/lucene40/BitVector.java	(working copy)
@@ -26,6 +26,7 @@
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.BitUtil;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.MutableBits;
 
@@ -166,7 +167,7 @@
       int c = 0;
       int end = bits.length;
       for (int i = 0; i < end; i++) {
-        c += BYTE_COUNTS[bits[i] & 0xFF];  // sum bits per byte
+        c += BitUtil.bitCount(bits[i]);  // sum bits per byte
       }
       count = c;
     }
@@ -179,30 +180,13 @@
     int c = 0;
     int end = bits.length;
     for (int i = 0; i < end; i++) {
-      c += BYTE_COUNTS[bits[i] & 0xFF];  // sum bits per byte
+      c += BitUtil.bitCount(bits[i]);  // sum bits per byte
     }
     return c;
   }
 
-  private static final byte[] BYTE_COUNTS = {  // table of bits/byte
-    0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
-    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-    4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
-  };
 
+
   private static String CODEC = "BitVector";
 
   // Version before version tracking was added:
@@ -294,7 +278,7 @@
         output.writeVInt(i-last);
         output.writeByte(bits[i]);
         last = i;
-        numCleared -= (8-BYTE_COUNTS[bits[i] & 0xFF]);
+        numCleared -= (8-BitUtil.bitCount(bits[i]));
         assert numCleared >= 0 || (i == (bits.length-1) && numCleared == -(8-(size&7)));
       }
     }
@@ -399,7 +383,7 @@
     while (n>0) {
       last += input.readVInt();
       bits[last] = input.readByte();
-      n -= BYTE_COUNTS[bits[last] & 0xFF];
+      n -= BitUtil.bitCount(bits[last]);
       assert n >= 0;
     }          
   }
@@ -416,7 +400,7 @@
     while (numCleared>0) {
       last += input.readVInt();
       bits[last] = input.readByte();
-      numCleared -= 8-BYTE_COUNTS[bits[last] & 0xFF];
+      numCleared -= 8-BitUtil.bitCount(bits[last]);
       assert numCleared >= 0 || (last == (bits.length-1) && numCleared == -(8-(size&7)));
     }
   }
Index: lucene/core/src/java/org/apache/lucene/util/CompressedDocIdSet.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/util/CompressedDocIdSet.java	(revision 0)
+++ lucene/core/src/java/org/apache/lucene/util/CompressedDocIdSet.java	(working copy)
@@ -0,0 +1,499 @@
+package org.apache.lucene.util;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Comparator;
+
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.DataInput;
+
+/**
+ * Compressed implementation of a {@link DocIdSet}.
+ * <p>This implementation uses a simplistic compression scheme that should work
+ * well with sparse doc id sets while being only slightly larger than a
+ * {@link FixedBitSet} for incompressible sets.</p>
+ * <p><b>Format</b>: The format is byte-aligned. An 8-bits word is either clean,
+ * meaning composed only of zeros, or dirty, meaning that it contains at least one
+ * bit set. The idea is to encode sequences of clean words using run-length
+ * encoding and to leave sequences of dirty words as-is.</p>
+ * <table>
+ *   <tr><th>Token</th><th>Clean length+</th><th>Dirty length+</th><th>Dirty words</th></tr>
+ *   <tr><td>1 byte</td><td>0-n bytes</td><td>0-n bytes</td><td>0-n bytes</td></tr>
+ * </table>
+ * <ul>
+ *   <li><b>Token</b> encodes the number of clean words minus 2 on the first 4
+ * bits and the number of dirty words minus 1 on the last 4 bits. The
+ * higher-order bit is a continuation bit, meaning that the number is incomplete
+ * and needs additional bytes to be read.</li>
+ *   <li><b>Clean length+</b>: If clean length has its higher-order bit set,
+ * you need to read a {@link DataInput#readVInt() vint}, shift it by 3 bits on
+ * the left side and add it to the 3 bits which have been read in the token.</li>
+ *   <li><b>Dirty length+</b> works the same way as <b>Clean length+</b> but
+ * for the length of dirty words.</li>
+ *   <li><b>Dirty words</b> are the dirty words, there are <b>Dirty length</b>
+ * of them.</li>
+ * </ul>
+ * <p>This format cannot encode sequences of less than 2 clean words and 1 dirty
+ * word. The reason is that if you find a single clean word, you should rather
+ * encode it as a dirty word. This takes the same space as starting a new
+ * sequence (since you need one byte for the token) but will be lighter to
+ * decode. There is however an exception for the first sequence. Since the first
+ * sequence may start directly with a dirty word, the clean length is encoded
+ * directly, without subtracting 2.</p>
+ * <p>There is an additional restriction on the format: the sequence of dirty
+ * words must start and end with a non-null word and is not allowed to contain
+ * two consecutive null words. This restriction exists to make sure no space is
+ * wasted and to make sure iterators can read the next doc ID by reading at most
+ * 2 dirty words.</p>
+ */
+public final class CompressedDocIdSet extends DocIdSet {
+
+  private static CompressedDocIdSet EMPTY = new CompressedDocIdSet(new byte[0]);
+
+  private static final Comparator<Iterator> SERIALIZED_LENGTH_COMPARATOR = new Comparator<Iterator>() {
+    @Override
+    public int compare(Iterator wi1, Iterator wi2) {
+      return wi1.in.length() - wi2.in.length();
+    }
+  };
+
+  /** Return a copy of the provided iterator. */
+  public static CompressedDocIdSet copyOf(DocIdSetIterator it) throws IOException {
+    Builder builder = new Builder();
+    for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
+      builder.add(doc);
+    }
+    return builder.build();
+  }
+
+  /**
+   * Compute the intersection of the provided sets. This method is much faster than
+   * computing the intersection manually since it operates directly at the byte level.
+   */
+  public static CompressedDocIdSet intersect(Collection<CompressedDocIdSet> docIdSets) {
+    switch (docIdSets.size()) {
+      case 0:
+        throw new IllegalArgumentException("There must be at least one set to intersect");
+      case 1:
+        return docIdSets.iterator().next();
+    }
+    // The logic below is similar to ConjunctionScorer
+    final int numSets = docIdSets.size();
+    final Iterator[] iterators = new Iterator[numSets];
+    int i = 0;
+    for (CompressedDocIdSet set : docIdSets) {
+      final Iterator it = set.iterator();
+      iterators[i++] = it;
+    }
+    Arrays.sort(iterators, SERIALIZED_LENGTH_COMPARATOR);
+    final WordBuilder builder = new WordBuilder();
+    int offset = 0;
+    main:
+    while (true) {
+      // Advance the least costly iterator first
+      iterators[0].advanceWord(offset);
+      offset = iterators[0].offset;
+      if (offset == DocIdSetIterator.NO_MORE_DOCS) {
+        break;
+      }
+      byte word = iterators[0].word;
+      for (i = 1; i < numSets; ++i) {
+        if (iterators[i].offset < offset) {
+          iterators[i].advanceWord(offset);
+        }
+        if (iterators[i].offset > offset) {
+          offset = iterators[i].offset;
+          continue main;
+        }
+        assert iterators[i].offset == offset;
+        word &= iterators[i].word;
+        if (word == 0) {
+          // There are common words, but they don't share any bit
+          ++offset;
+          continue main;
+        }
+      }
+      // Found a common word
+      assert word != 0;
+      builder.addWord(offset, word);
+      ++offset;
+    }
+    return builder.build();
+  }
+
+  /**
+   * Compute the union of the provided sets. This method is much faster than
+   * computing the union manually since it operates directly at the byte level.
+   */
+  public static CompressedDocIdSet union(Collection<CompressedDocIdSet> docIdSets) {
+    switch (docIdSets.size()) {
+      case 0:
+        return EMPTY;
+      case 1:
+        return docIdSets.iterator().next();
+    }
+    // The logic below is very similar to DisjunctionScorer
+    final int numSets = docIdSets.size();
+    final PriorityQueue<Iterator> iterators = new PriorityQueue<CompressedDocIdSet.Iterator>(numSets) {
+      @Override
+      protected boolean lessThan(Iterator a, Iterator b) {
+        return a.offset < b.offset;
+      }
+    };
+    for (CompressedDocIdSet set : docIdSets) {
+      Iterator iterator = set.iterator();
+      iterator.nextWord();
+      iterators.add(iterator);
+    }
+
+    Iterator top = iterators.top();
+    if (top.offset == Integer.MAX_VALUE) {
+      return EMPTY;
+    }
+    int offset = top.offset;
+    byte word = top.word;
+    final WordBuilder builder = new WordBuilder();
+    while (true) {
+      top.nextWord();
+      iterators.updateTop();
+      top = iterators.top();
+      if (top.offset == offset) {
+        word |= top.word;
+      } else {
+        builder.addWord(offset, word);
+        if (top.offset == Integer.MAX_VALUE) {
+          break;
+        }
+        offset = top.offset;
+        word = top.word;
+      }
+    }
+    return builder.build();
+  }
+
+  static int wordNum(int docID) {
+    assert docID >= 0;
+    return docID >>> 3;
+  }
+
+  /** Word-based builder. */
+  static class WordBuilder {
+
+    final GrowableByteArrayDataOutput out;
+    final GrowableByteArrayDataOutput dirtyWords;
+    int clean;
+    private int lastWordNum;
+
+    WordBuilder() {
+      out = new GrowableByteArrayDataOutput(1024);
+      dirtyWords = new GrowableByteArrayDataOutput(128);
+      clean = 0;
+      lastWordNum = -1;
+    }
+
+    void writeHeader(int cleanLength) throws IOException {
+      final int cleanLengthMinus2 = cleanLength - 2;
+      final int dirtyLengthMinus1 = dirtyWords.length - 1;
+      assert cleanLengthMinus2 >= 0;
+      assert dirtyLengthMinus1 >= 0;
+      int token = ((cleanLengthMinus2 & 0x07) << 4) | (dirtyLengthMinus1 & 0x07);
+      if (cleanLengthMinus2 > 0x07) {
+        token |= 1 << 7;
+      }
+      if (dirtyLengthMinus1 > 0x07) {
+        token |= 1 << 3;
+      }
+      out.writeByte((byte) token);
+      if (cleanLengthMinus2 > 0x07) {
+        out.writeVInt(cleanLengthMinus2 >>> 3);
+      }
+      if (dirtyLengthMinus1 > 0x07) {
+        out.writeVInt(dirtyLengthMinus1 >>> 3);
+      }
+    }
+
+    void writeSequence(int cleanLength) {
+      try {
+        writeHeader(cleanLength);
+        out.writeBytes(dirtyWords.bytes, dirtyWords.length);
+      } catch (IOException cannotHappen) {
+        throw new AssertionError(cannotHappen);
+      }
+      dirtyWords.length = 0;
+    }
+
+    void addWord(int wordNum, byte word) {
+      assert wordNum > lastWordNum;
+      assert word != 0;
+
+      if (lastWordNum == -1) {
+        clean = 2 + wordNum; // special case for the 1st sequence
+        dirtyWords.writeByte(word);
+      } else {
+        switch (wordNum - lastWordNum) {
+          case 1:
+            dirtyWords.writeByte(word);
+            break;
+          case 2:
+            dirtyWords.writeByte((byte) 0);
+            dirtyWords.writeByte(word);
+            break;
+          default:
+            writeSequence(clean);
+            clean = wordNum - lastWordNum - 1;
+            dirtyWords.writeByte(word);
+        }
+      }
+      lastWordNum = wordNum;
+    }
+
+    /** Build a new {@link CompressedDocIdSet}. */
+    public CompressedDocIdSet build() {
+      if (lastWordNum == -1) {
+        return EMPTY;
+      }
+      writeSequence(clean);
+      final byte[] data = Arrays.copyOf(out.bytes, out.length);
+      return new CompressedDocIdSet(data);
+    }
+
+  }
+
+  /** A builder for {@link CompressedDocIdSet}s. */
+  public static final class Builder extends WordBuilder {
+
+    private int lastDocID;
+    private int wordNum, word;
+
+    /** Sole constructor */
+    public Builder() {
+      super();
+      lastDocID = -1;
+      wordNum = -1;
+      word = 0;
+    }
+
+    /** Add a document to this builder. Documents must be added in order. */
+    public void add(int docID) {
+      if (docID <= lastDocID) {
+        throw new IllegalArgumentException("Doc ids must be added in-order, got " + docID + " which is <= lastDocID=" + lastDocID);
+      }
+      final int wordNum = wordNum(docID);
+      if (this.wordNum == -1) {
+        this.wordNum = wordNum;
+        word = 1 << (docID & 0x07);
+      } else if (wordNum == this.wordNum) {
+        word |= 1 << (docID & 0x07);
+      } else {
+        addWord(this.wordNum, (byte) word);
+        this.wordNum = wordNum;
+        word = 1 << (docID & 0x07);
+      }
+      lastDocID = docID;
+    }
+
+    @Override
+    public CompressedDocIdSet build() {
+      if (this.wordNum != -1) {
+        addWord(wordNum, (byte) word);
+      }
+      return super.build();
+    }
+
+  }
+
+  private final byte[] data;
+
+  CompressedDocIdSet(byte[] data) {
+    this.data = data;
+  }
+
+  @Override
+  public boolean isCacheable() {
+    return true;
+  }
+
+  @Override
+  public Iterator iterator() {
+    return new Iterator(data);
+  }
+
+  static int readLength(ByteArrayDataInput in, int len) {
+    if ((len & 0x08) == 0) {
+      // no continuation bit
+      return len;
+    }
+    return (len & 0x07) | (in.readVInt() << 3);
+  }
+
+  static class Iterator extends DocIdSetIterator {
+
+    final ByteArrayDataInput in;
+    int dirtyLength;
+
+    int offset; // byte offset
+    byte word;
+
+    int docID;
+
+    Iterator(byte[] data) {
+      this.in = new ByteArrayDataInput(data);
+      offset = -1;
+      docID = -1;
+    }
+
+    private boolean readSequence() {
+      if (in.eof()) {
+        offset = Integer.MAX_VALUE;
+        return false;
+      }
+      final int token = in.readByte() & 0xFF;
+      final int cleanLength = (in.getPosition() == 1 ? 0 : 2) + readLength(in, token >>> 4);
+      offset += cleanLength;
+      dirtyLength = 1 + readLength(in, token & 0x0F);
+      return true;
+    }
+
+    void nextWord() {
+      if (dirtyLength == 0 && !readSequence()) {
+        return;
+      }
+      word = in.readByte();
+      if (word == 0) { // there can never be two consecutive null dirty words
+        word = in.readByte();
+        ++offset;
+        --dirtyLength;
+      }
+      ++offset;
+      --dirtyLength;
+    }
+
+    void advanceWord(int wordNum) {
+      assert wordNum > offset;
+      if (dirtyLength == 0 && !readSequence()) {
+        return;
+      }
+      while (true) {
+        final int delta = wordNum - offset;
+        if (delta <= 0) {
+          word = in.readByte(); // first dirty word, guaranteed to be non null
+          ++offset;
+          --dirtyLength;
+          break;
+        } else if (delta <= dirtyLength) {
+          offset = wordNum;
+          in.skipBytes(delta - 1);
+          dirtyLength -= delta;
+          word = in.readByte();
+          if (word == 0) {
+            word = in.readByte();
+            assert word != 0;
+            ++offset;
+            --dirtyLength;
+          }
+          break;
+        } else {
+          offset += dirtyLength;
+          in.skipBytes(dirtyLength);
+          dirtyLength = 0;
+          if (!readSequence()) {
+            break;
+          }
+        }
+      }
+    }
+
+    @Override
+    public int docID() {
+      return docID;
+    }
+
+    private int nextDocFromNextWord() {
+      nextWord();
+      if (offset == Integer.MAX_VALUE) {
+        return docID = NO_MORE_DOCS;
+      }
+      return docID = (offset << 3) + Integer.numberOfTrailingZeros(word & 0xFF);
+    }
+
+    @Override
+    public int nextDoc() throws IOException {
+      if (docID == NO_MORE_DOCS) {
+        return NO_MORE_DOCS;
+      }
+
+      final int byteOffset = (docID & 0x07) + 1;
+      final int remainingBits = (word & 0xFF) >>> byteOffset;
+      if (remainingBits != 0) {
+        return docID += 1 + Integer.numberOfTrailingZeros(remainingBits);
+      }
+
+      return nextDocFromNextWord();
+    }
+
+    @Override
+    public int advance(int target) throws IOException {
+      assert target > docID;
+      final int targetWord = wordNum(target);
+      if (targetWord > offset) {
+        advanceWord(targetWord);
+        if (offset > targetWord) {
+          if (offset == Integer.MAX_VALUE) {
+            return docID = NO_MORE_DOCS;
+          }
+          return docID = (offset << 3) + Integer.numberOfTrailingZeros(word & 0xFF);
+        }
+      }
+      assert offset == targetWord;
+      final int byteOffset = target & 0x07;
+      final int remainingBits = (word & 0xFF) >>> byteOffset;
+      if (remainingBits != 0) {
+        return docID = target + Integer.numberOfTrailingZeros(remainingBits);
+      }
+
+      return nextDocFromNextWord();
+    }
+
+    @Override
+    public long cost() {
+      return in.length();
+    }
+
+  }
+
+  /** Return the number of documents in this {@link DocIdSet}. This method
+   *  runs in linear time but is much faster than counting documents. */
+  public int cardinality() {
+    int cardinality = 0;
+    for (Iterator it = iterator(); it.offset != Integer.MAX_VALUE; it.nextWord()) {
+      cardinality += BitUtil.bitCount(it.word);
+    }
+    return cardinality;
+  }
+
+  /** Return the memory usage of this class in bytes. */
+  public long ramBytesUsed() {
+    return RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.NUM_BYTES_INT) + RamUsageEstimator.sizeOf(data);
+  }
+
+}

Property changes on: lucene/core/src/java/org/apache/lucene/util/CompressedDocIdSet.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/core/src/java/org/apache/lucene/util/GrowableByteArrayDataOutput.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/util/GrowableByteArrayDataOutput.java	(working copy)
+++ lucene/core/src/java/org/apache/lucene/util/GrowableByteArrayDataOutput.java	(working copy)
@@ -1,4 +1,4 @@
-package org.apache.lucene.codecs.compressing;
+package org.apache.lucene.util;
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -17,26 +17,27 @@
  * limitations under the License.
  */
 
-import java.io.IOException;
-
 import org.apache.lucene.store.DataOutput;
-import org.apache.lucene.util.ArrayUtil;
 
 /**
  * A {@link DataOutput} that can be used to build a byte[].
+ * @lucene.internal
  */
-final class GrowableByteArrayDataOutput extends DataOutput {
+public final class GrowableByteArrayDataOutput extends DataOutput {
 
-  byte[] bytes;
-  int length;
+  /** The bytes */
+  public byte[] bytes;
+  /** The length */
+  public int length;
 
-  GrowableByteArrayDataOutput(int cp) {
+  /** Sole constructor */
+  public GrowableByteArrayDataOutput(int cp) {
     this.bytes = new byte[ArrayUtil.oversize(cp, 1)];
     this.length = 0;
   }
 
   @Override
-  public void writeByte(byte b) throws IOException {
+  public void writeByte(byte b) {
     if (length >= bytes.length) {
       bytes = ArrayUtil.grow(bytes);
     }
@@ -44,7 +45,7 @@
   }
 
   @Override
-  public void writeBytes(byte[] b, int off, int len) throws IOException {
+  public void writeBytes(byte[] b, int off, int len) {
     final int newLength = length + len;
     if (newLength > bytes.length) {
       bytes = ArrayUtil.grow(bytes, newLength);
Index: lucene/core/src/java/org/apache/lucene/util/BitUtil.java
===================================================================
--- lucene/core/src/java/org/apache/lucene/util/BitUtil.java	(revision 1497809)
+++ lucene/core/src/java/org/apache/lucene/util/BitUtil.java	(working copy)
@@ -22,8 +22,32 @@
  */
 public final class BitUtil {
 
+  private static final byte[] BYTE_COUNTS = {  // table of bits/byte
+    0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
+    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+    4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
+  };
+
   private BitUtil() {} // no instance
 
+  /** Return the number of bits sets in b. */
+  public static int bitCount(byte b) {
+    return BYTE_COUNTS[b & 0xFF];
+  }
+
   // The pop methods used to rely on bit-manipulation tricks for speed but it
   // turns out that it is faster to use the Long.bitCount method (which is an
   // intrinsic since Java 6u18) in a naive loop, see LUCENE-2221
