Index: src/test/org/apache/lucene/util/TestBitVector.java =================================================================== --- src/test/org/apache/lucene/util/TestBitVector.java (revision 738896) +++ src/test/org/apache/lucene/util/TestBitVector.java (working copy) @@ -19,7 +19,7 @@ import java.io.IOException; -import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; @@ -201,7 +201,52 @@ bv.write(d, "TESTBV"); } } + /** + * Test nextSetBit() with various bit-twiddling edge cases. + * @throws Exception + */ + public void testNextSetBit() throws Exception { + for (int i = 24; i <= 33; i++) { + BitVector bitVec = new BitVector(64); + bitVec.set(i); + assertEquals("nextSetBit for 0 is " + i, bitVec.nextSetBit(0), i); + assertEquals("nextSetBit for 1 is " + i, bitVec.nextSetBit(1), i); + for (int probe = 15; probe <= i; probe++) { + assertEquals("nextSetBit for " + probe + " is " + i, + bitVec.nextSetBit(probe), i); + } + for (int probe = i + 1; probe <= i + 9; probe++) { + assertEquals("no nextSetBit for " + probe + " when max is " + i, + bitVec.nextSetBit(probe), -1); + } + } + } + + public void testDocIdSet() throws IOException { + int[] ints = new int[] {0,1,2,3,5,8}; + BitVector bv = new BitVector(9); + for (int x=0; x < ints.length; x++) { + bv.set(ints[x]); + } + tstDocIdSet(ints, bv); + } + + private void tstDocIdSet(int[] ints, BitVector bitVector) throws IOException { + for (int i = 0; i < ints.length; i++) { + if ((i > 0) && (ints[i-1] == ints[i])) { + return; // DocNrSkipper should not skip to same document. + } + } + DocIdSetIterator m = bitVector.iterator(); + for (int i = 0; i < ints.length; i++) { + assertTrue("No end of Matcher at: " + i, m.next()); + assertEquals(ints[i], m.doc()); + } + assertTrue("End of Matcher", (! m.next())); + } + + /** * Compare two BitVectors. * This should really be an equals method on the BitVector itself. * @param bv One bit vector Index: src/java/org/apache/lucene/search/MultiDocIdSet.java =================================================================== --- src/java/org/apache/lucene/search/MultiDocIdSet.java (revision 0) +++ src/java/org/apache/lucene/search/MultiDocIdSet.java (revision 0) @@ -0,0 +1,68 @@ +package org.apache.lucene.search; + +import java.io.IOException; + +/** + * DocIdSet over multiple DocIdSets + */ + +public class MultiDocIdSet extends DocIdSet { + protected DocIdSet[] docIdSets; + protected int[] starts; + + public MultiDocIdSet(int[] starts, DocIdSet[] docIdSets) throws IOException { + this.starts = starts; + this.docIdSets = docIdSets; + } + + public DocIdSetIterator iterator() throws IOException { + return new MultiDocIdSetIterator(); + } + + private class MultiDocIdSetIterator extends DocIdSetIterator { + protected DocIdSetIterator[] docIdSetsIts; + protected DocIdSetIterator current; + protected int base = 0; + protected int pointer = 0; + + public MultiDocIdSetIterator() throws IOException { + docIdSetsIts = new DocIdSetIterator[docIdSets.length]; + } + + public int doc() { + return base + current.doc(); + } + + private DocIdSetIterator iterator(int i) throws IOException { + DocIdSetIterator result = docIdSetsIts[i]; + if (result == null) + result = docIdSetsIts[i] = docIdSets[i].iterator(); + return result; + } + + public boolean next() throws IOException { + for (;;) { + if (current != null && current.next()) { + return true; + } else if (pointer < docIdSetsIts.length) { + base = starts[pointer]; + current = iterator(pointer++); + } else { + return false; + } + } + } + + public boolean skipTo(int target) throws IOException { + for (;;) { + if (current != null && current.skipTo(target - base)) { + return true; + } else if (pointer < docIdSets.length) { + base = starts[pointer]; + current = iterator(pointer++); + } else + return false; + } + } + } +} Property changes on: src/java/org/apache/lucene/search/MultiDocIdSet.java ___________________________________________________________________ Added: svn:eol-style + native Index: src/java/org/apache/lucene/index/ParallelReader.java =================================================================== --- src/java/org/apache/lucene/index/ParallelReader.java (revision 738896) +++ src/java/org/apache/lucene/index/ParallelReader.java (working copy) @@ -21,6 +21,7 @@ import org.apache.lucene.document.FieldSelector; import org.apache.lucene.document.FieldSelectorResult; import org.apache.lucene.document.Fieldable; +import org.apache.lucene.search.DocIdSet; import java.io.IOException; import java.util.*; @@ -231,6 +232,12 @@ return ((IndexReader)readers.get(0)).isDeleted(n); return false; } + + public DocIdSet getDeletedDocs() throws IOException { + if (readers.size() > 0) + return ((IndexReader)readers.get(0)).getDeletedDocs(); + return null; + } // delete in all readers protected void doDelete(int n) throws CorruptIndexException, IOException { Index: src/java/org/apache/lucene/index/SegmentReader.java =================================================================== --- src/java/org/apache/lucene/index/SegmentReader.java (revision 738896) +++ src/java/org/apache/lucene/index/SegmentReader.java (working copy) @@ -31,6 +31,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; import org.apache.lucene.search.DefaultSimilarity; +import org.apache.lucene.search.DocIdSet; import org.apache.lucene.store.BufferedIndexInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; @@ -719,7 +720,11 @@ ("attempt to access a deleted document"); return getFieldsReader().doc(n, fieldSelector); } - + + public DocIdSet getDeletedDocs() throws IOException { + return deletedDocs; + } + public synchronized boolean isDeleted(int n) { return (deletedDocs != null && deletedDocs.get(n)); } Index: src/java/org/apache/lucene/index/SegmentTermDocs.java =================================================================== --- src/java/org/apache/lucene/index/SegmentTermDocs.java (revision 738896) +++ src/java/org/apache/lucene/index/SegmentTermDocs.java (working copy) @@ -20,6 +20,7 @@ import java.io.IOException; import org.apache.lucene.util.BitVector; import org.apache.lucene.store.IndexInput; +import org.apache.lucene.search.DocIdSetIterator; class SegmentTermDocs implements TermDocs { protected SegmentReader parent; @@ -27,6 +28,7 @@ protected int count; protected int df; protected BitVector deletedDocs; + DocIdSetIterator deletedDocsIt; int doc = 0; int freq; @@ -42,12 +44,14 @@ protected boolean currentFieldStoresPayloads; protected boolean currentFieldOmitTf; + int nextDeletion; protected SegmentTermDocs(SegmentReader parent) { this.parent = parent; this.freqStream = (IndexInput) parent.freqStream.clone(); synchronized (parent) { this.deletedDocs = parent.deletedDocs; + if (deletedDocs != null) this.deletedDocsIt = deletedDocs.iterator(); } this.skipInterval = parent.tis.getSkipInterval(); this.maxSkipLevels = parent.tis.getMaxSkipLevels(); @@ -77,6 +81,7 @@ void seek(TermInfo ti, Term term) throws IOException { count = 0; + nextDeletion = this.deletedDocsIt == null ? Integer.MAX_VALUE : -1; FieldInfo fi = parent.fieldInfos.fieldInfo(term.field); currentFieldOmitTf = (fi != null) ? fi.omitTf : false; currentFieldStoresPayloads = (fi != null) ? fi.storePayloads : false; @@ -123,10 +128,17 @@ } count++; - - if (deletedDocs == null || !deletedDocs.get(doc)) - break; - skippingDoc(); + if (deletedDocsIt != null) { + if (doc > nextDeletion) { + if (deletedDocsIt.skipTo(doc)) + nextDeletion = deletedDocsIt.doc(); + } + if (doc == nextDeletion) { + skippingDoc(); + continue; + } + } + break; } return true; } @@ -148,36 +160,44 @@ else freq = freqStream.readVInt(); // else read freq count++; - - if (deletedDocs == null || !deletedDocs.get(doc)) { - docs[i] = doc; - freqs[i] = freq; - ++i; + if (deletedDocsIt != null) { + if (doc > nextDeletion) { + if (deletedDocsIt.skipTo(doc)) + nextDeletion = deletedDocsIt.doc(); + } + if (doc == nextDeletion) + continue; } + docs[i] = doc; + freqs[i] = freq; + ++i; } return i; } } - + private final int readNoTf(final int[] docs, final int[] freqs, final int length) throws IOException { int i = 0; while (i < length && count < df) { // manually inlined call to next() for speed doc += freqStream.readVInt(); count++; - - if (deletedDocs == null || !deletedDocs.get(doc)) { - docs[i] = doc; - // Hardware freq to 1 when term freqs were not - // stored in the index - freqs[i] = 1; - ++i; + if (deletedDocsIt != null) { + if (doc > nextDeletion) { + if (deletedDocsIt.skipTo(doc)) + nextDeletion = deletedDocsIt.doc(); + } + if (doc == nextDeletion) + continue; } + docs[i] = doc; + freqs[i] = 1; + ++i; } return i; } - + /** Overridden by SegmentTermPositions to skip in prox stream. */ protected void skipProx(long proxPointer, int payloadLength) throws IOException {} Index: src/java/org/apache/lucene/index/MultiReader.java =================================================================== --- src/java/org/apache/lucene/index/MultiReader.java (revision 738896) +++ src/java/org/apache/lucene/index/MultiReader.java (working copy) @@ -27,6 +27,8 @@ import org.apache.lucene.index.MultiSegmentReader.MultiTermDocs; import org.apache.lucene.index.MultiSegmentReader.MultiTermEnum; import org.apache.lucene.index.MultiSegmentReader.MultiTermPositions; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.MultiDocIdSet; /** An IndexReader which reads multiple indexes, appending their content. * @@ -338,6 +340,11 @@ } } + public DocIdSet getDeletedDocs() throws IOException { + DocIdSet[] sets = MultiSegmentReader.getDeletedDocsDocIdSets(subReaders); + return new MultiDocIdSet(starts, sets); + } + public Collection getFieldNames (IndexReader.FieldOption fieldNames) { ensureOpen(); return MultiSegmentReader.getFieldNames(fieldNames, this.subReaders); Index: src/java/org/apache/lucene/index/FilterIndexReader.java =================================================================== --- src/java/org/apache/lucene/index/FilterIndexReader.java (revision 738896) +++ src/java/org/apache/lucene/index/FilterIndexReader.java (working copy) @@ -19,6 +19,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; +import org.apache.lucene.search.DocIdSet; import org.apache.lucene.store.Directory; import java.io.IOException; @@ -146,7 +147,11 @@ ensureOpen(); return in.document(n, fieldSelector); } - + + public DocIdSet getDeletedDocs() throws IOException { + return in.getDeletedDocs(); + } + public boolean isDeleted(int n) { // Don't call ensureOpen() here (it could affect performance) return in.isDeleted(n); Index: src/java/org/apache/lucene/index/IndexReader.java =================================================================== --- src/java/org/apache/lucene/index/IndexReader.java (revision 738896) +++ src/java/org/apache/lucene/index/IndexReader.java (working copy) @@ -20,6 +20,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.DocIdSet; import org.apache.lucene.store.*; import java.io.File; @@ -1042,6 +1043,11 @@ public IndexCommit getIndexCommit() throws IOException { throw new UnsupportedOperationException("This reader does not support this method."); } + + // nocommit -- must implement for impls in contrib + public DocIdSet getDeletedDocs() throws IOException { + throw new UnsupportedOperationException("This reader does not support this method."); + } /** * Prints the filename and size of each file within a given compound file. Index: src/java/org/apache/lucene/index/MultiSegmentReader.java =================================================================== --- src/java/org/apache/lucene/index/MultiSegmentReader.java (revision 738896) +++ src/java/org/apache/lucene/index/MultiSegmentReader.java (working copy) @@ -25,6 +25,8 @@ import java.util.Map; import java.util.Set; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.MultiDocIdSet; import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; import org.apache.lucene.store.Directory; @@ -193,7 +195,20 @@ } starts[subReaders.length] = maxDoc; } - + + static DocIdSet[] getDeletedDocsDocIdSets(IndexReader[] readers) throws IOException { + DocIdSet[] sets = new DocIdSet[readers.length]; + for (int x=0; x < readers.length; x++) { + sets[x] = readers[x].getDeletedDocs(); + } + return sets; + } + + public DocIdSet getDeletedDocs() throws IOException { + DocIdSet[] sets = getDeletedDocsDocIdSets(subReaders); + return new MultiDocIdSet(starts, sets); + } + protected synchronized DirectoryIndexReader doReopen(SegmentInfos infos) throws CorruptIndexException, IOException { if (infos.size() == 1) { // The index has only one segment now, so we can't refresh the MultiSegmentReader. Index: src/java/org/apache/lucene/util/BitVector.java =================================================================== --- src/java/org/apache/lucene/util/BitVector.java (revision 738896) +++ src/java/org/apache/lucene/util/BitVector.java (working copy) @@ -19,6 +19,8 @@ import java.io.IOException; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.DocIdSet; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; @@ -35,8 +37,7 @@ @version $Id$ */ -public final class BitVector { - +public final class BitVector extends DocIdSet { private byte[] bits; private int size; private int count = -1; @@ -46,7 +47,59 @@ size = n; bits = new byte[(size >> 3) + 1]; } + + /** Returns the index of the first bit that is set to true that occurs on or + * after the specified starting index. + */ + public final int nextSetBit(int bit) { + while (bit < size) { + if ((bits[bit >> 3] & (1 << (bit & 7))) != 0) { + return bit; + } + bit++; + } + return -1; + } + + public DocIdSetIterator iterator() { + return new BitVectorDocIdSetIterator(this); + } + + private static class BitVectorDocIdSetIterator extends DocIdSetIterator { + private int docId; + private final BitVector bitSet; + + BitVectorDocIdSetIterator(BitVector bitSet) { + this.bitSet = bitSet; + this.docId = -1; + } + + public final int doc() { + assert docId != -1; + return docId; + } + + public final boolean next() { + // (docId + 1) on next line requires -1 initial value for docNr: + return checkNextDocId(bitSet.nextSetBit(docId + 1)); + } + + public final boolean skipTo(int skipDocNr) { + return checkNextDocId(bitSet.nextSetBit(skipDocNr)); + } + + private final boolean checkNextDocId(int d) { + if (d == -1) { // -1 returned by BitSet.nextSetBit() when exhausted + docId = Integer.MAX_VALUE; + return false; + } else { + docId = d; + return true; + } + } + } + /** Sets the value of bit to one. */ public final void set(int bit) { if (bit >= size) { Index: contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java =================================================================== --- contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (revision 738897) +++ contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (working copy) @@ -45,6 +45,7 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Searcher; +import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Similarity; /** @@ -736,6 +737,10 @@ return (Info) sortedFields[pos].getValue(); } + public DocIdSet getDeletedDocs() throws IOException { + return DocIdSet.EMPTY_DOCIDSET; + } + public int docFreq(Term term) { Info info = getInfo(term.field()); int freq = 0;