Index: lucene/contrib/misc/src/java/org/apache/lucene/index/PKIndexSplitter.java =================================================================== --- lucene/contrib/misc/src/java/org/apache/lucene/index/PKIndexSplitter.java (revision 1137067) +++ lucene/contrib/misc/src/java/org/apache/lucene/index/PKIndexSplitter.java (working copy) @@ -19,12 +19,16 @@ import java.io.IOException; -import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.store.Directory; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.Filter; +import org.apache.lucene.search.TermRangeFilter; import org.apache.lucene.util.Bits; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.OpenBitSet; +import org.apache.lucene.util.OpenBitSetDISI; +import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.Version; /** @@ -33,10 +37,10 @@ * sent to dir2. */ public class PKIndexSplitter { - private Term midTerm; - Directory input; - Directory dir1; - Directory dir2; + private final Term midTerm; + private final Directory input; + private final Directory dir1; + private final Directory dir2; public PKIndexSplitter(Term midTerm, Directory input, Directory dir1, Directory dir2) { @@ -47,85 +51,72 @@ } public void split() throws IOException { + boolean success = false; IndexReader reader = IndexReader.open(input); - OpenBitSet lowDels = setDeletes(reader, null, midTerm.bytes()); - OpenBitSet hiDels = setDeletes(reader, midTerm.bytes(), null); - - createIndex(dir1, reader, lowDels); - createIndex(dir2, reader, hiDels); - reader.close(); + try { + Filter filter = new TermRangeFilter(midTerm.field(), null, midTerm.bytes(), true, false); + createIndex(dir1, reader, filter, false); + createIndex(dir2, reader, filter, true); + success = true; + } finally { + IOUtils.closeSafely(!success, reader); + } } - private void createIndex(Directory target, IndexReader reader, OpenBitSet bv) throws IOException { + private void createIndex(Directory target, IndexReader reader, Filter preserveFilter, boolean negateFilter) throws IOException { + boolean success = false; IndexWriter w = new IndexWriter(target, new IndexWriterConfig( - Version.LUCENE_CURRENT, - new WhitespaceAnalyzer(Version.LUCENE_CURRENT)) - .setOpenMode(OpenMode.CREATE)); - w.addIndexes(new DeletesIndexReader(reader, bv)); - w.close(); - } - - private OpenBitSet setDeletes(IndexReader reader, BytesRef startTerm, - BytesRef endTermExcl) throws IOException { - OpenBitSet incl = new OpenBitSet(reader.maxDoc()); - Terms terms = MultiFields.getTerms(reader, midTerm.field()); - TermsEnum te = terms.iterator(); - if (startTerm != null) { - te.seek(startTerm); + Version.LUCENE_CURRENT, null).setOpenMode(OpenMode.CREATE)); + try { + w.addIndexes(new DocumentFilteredIndexReader(reader, preserveFilter, negateFilter)); + success = true; + } finally { + IOUtils.closeSafely(!success, w); } - while (true) { - final BytesRef term = te.next(); - if (term == null) { - break; - } - if (endTermExcl != null && term.compareTo(endTermExcl) >= 0) { - break; - } - DocsEnum docs = MultiFields.getTermDocsEnum(reader, - MultiFields.getDeletedDocs(reader), midTerm.field(), term); - while (true) { - final int doc = docs.nextDoc(); - if (doc != DocsEnum.NO_MORE_DOCS) { - incl.set(doc); - } else break; - } - } - OpenBitSet dels = new OpenBitSet(reader.maxDoc()); - for (int x=0; x < reader.maxDoc(); x++) { - if (!incl.get(x)) { - dels.set(x); - } - } - return dels; } - - public static class DeletesIndexReader extends FilterIndexReader { - OpenBitSet readerDels; - public DeletesIndexReader(IndexReader reader, OpenBitSet deletes) { + public static class DocumentFilteredIndexReader extends FilterIndexReader { + final Bits readerDels; + final int numDocs; + + public DocumentFilteredIndexReader(IndexReader reader, Filter preserveFilter, boolean negateFilter) throws IOException { super(new SlowMultiReaderWrapper(reader)); - readerDels = new OpenBitSet(reader.maxDoc()); + + final OpenBitSetDISI bits = new OpenBitSetDISI(in.maxDoc()); + final DocIdSet docs = preserveFilter.getDocIdSet((AtomicReaderContext) in.getTopReaderContext()); + if (docs != null) { + final DocIdSetIterator it = docs.iterator(); + if (it != null) { + bits.inPlaceOr(it); + } + } + // this is somehow inverse, if we negate the filter, we delete all documents it matches! + if (!negateFilter) { + bits.flip(0, in.maxDoc()); + } + if (in.hasDeletions()) { - final Bits oldDelBits = MultiFields.getDeletedDocs(in); + final Bits oldDelBits = in.getDeletedDocs(); assert oldDelBits != null; for (int i = 0; i < in.maxDoc(); i++) { - if (oldDelBits.get(i) || deletes.get(i)) { - readerDels.set(i); + if (oldDelBits.get(i)) { + bits.set(i); } } - } else { - readerDels = deletes; } + + this.readerDels = bits; + this.numDocs = in.maxDoc() - (int) bits.cardinality(); } @Override public int numDocs() { - return in.maxDoc() - (int)readerDels.cardinality(); + return numDocs; } @Override public boolean hasDeletions() { - return (int)readerDels.cardinality() > 0; + return (in.maxDoc() != numDocs); } @Override Index: lucene/contrib/misc/src/test/org/apache/lucene/index/TestPKIndexSplitter.java =================================================================== --- lucene/contrib/misc/src/test/org/apache/lucene/index/TestPKIndexSplitter.java (revision 1137067) +++ lucene/contrib/misc/src/test/org/apache/lucene/index/TestPKIndexSplitter.java (working copy) @@ -42,11 +42,11 @@ Version.LUCENE_CURRENT, new WhitespaceAnalyzer(Version.LUCENE_CURRENT)) .setOpenMode(OpenMode.CREATE)); - for (int x=0; x < 10; x++) { + for (int x=0; x < 11; x++) { Document doc = createDocument(x, "1", 3, format); w.addDocument(doc); } - for (int x=15; x < 20; x++) { + for (int x=11; x < 20; x++) { Document doc = createDocument(x, "2", 3, format); w.addDocument(doc); } @@ -61,9 +61,12 @@ IndexReader ir1 = IndexReader.open(dir1); IndexReader ir2 = IndexReader.open(dir2); - assertEquals(10, ir1.maxDoc()); - assertEquals(4, ir2.maxDoc()); + assertEquals(11, ir1.numDocs()); + assertEquals(9, ir2.numDocs()); + assertEquals(20, ir1.numDocs() + ir2.numDocs()); + // todo: check termdocs of field "indexname" + ir1.close(); ir2.close();