Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1367080) +++ lucene/CHANGES.txt (working copy) @@ -34,6 +34,11 @@ CharFilterFactories to the lucene-analysis module. The API is still experimental. (Chris Male, Robert Muir, Uwe Schindler) +* LUCENE-4203: Add IndexWriter.tryDeleteDocument(AtomicReader reader, + int docID), to attempt deletion by docID as long as the provided + reader is an NRT reader, and the segment has not yet been merged + away (Mike McCandless). + API Changes * LUCENE-4138: update of morfologik (Polish morphological analyzer) to 1.5.3. Index: lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java (revision 1367080) +++ lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java (working copy) @@ -23,6 +23,9 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.memory.MemoryPostingsFormat; import org.apache.lucene.document.*; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.*; import org.apache.lucene.util.*; import org.junit.Test; @@ -48,10 +51,13 @@ final int SIZE = atLeast(20); int id = 0; IndexReader r = null; + IndexSearcher s = null; final int numUpdates = (int) (SIZE * (2+(TEST_NIGHTLY ? 200*random().nextDouble() : 5*random().nextDouble()))); if (VERBOSE) { System.out.println("TEST: numUpdates=" + numUpdates); } + int updateCount = 0; + // TODO: sometimes update ids not in order... for(int docIter=0;docIter= SIZE && random().nextInt(50) == 17) { if (r != null) { r.close(); } + final boolean applyDeletions = random().nextBoolean(); + + if (VERBOSE) { + System.out.println("TEST: reopen applyDeletions=" + applyDeletions); + } + r = w.getReader(applyDeletions); + if (applyDeletions) { + s = new IndexSearcher(r); + } else { + s = null; + } assertTrue("applyDeletions=" + applyDeletions + " r.numDocs()=" + r.numDocs() + " vs SIZE=" + SIZE, !applyDeletions || r.numDocs() == SIZE); + updateCount = 0; } } Index: lucene/core/src/java/org/apache/lucene/search/NRTManager.java =================================================================== --- lucene/core/src/java/org/apache/lucene/search/NRTManager.java (revision 1367080) +++ lucene/core/src/java/org/apache/lucene/search/NRTManager.java (working copy) @@ -27,6 +27,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.SegmentInfoPerCommit; import org.apache.lucene.index.IndexReader; // javadocs import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexableField; @@ -254,6 +255,14 @@ long getAndIncrementGeneration() { return indexingGen.getAndIncrement(); } + + public long tryDeleteDocument(IndexReader reader, int docID) throws IOException { + if (writer.tryDeleteDocument(reader, docID)) { + return indexingGen.get(); + } else { + return -1; + } + } } /** Index: lucene/core/src/java/org/apache/lucene/index/AtomicReaderContext.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/AtomicReaderContext.java (revision 1367080) +++ lucene/core/src/java/org/apache/lucene/index/AtomicReaderContext.java (working copy) @@ -1,8 +1,5 @@ package org.apache.lucene.index; -import java.util.Collections; -import java.util.List; - /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -20,6 +17,9 @@ * limitations under the License. */ +import java.util.Collections; +import java.util.List; + /** * {@link IndexReaderContext} for {@link AtomicReader} instances * @lucene.experimental @@ -51,8 +51,9 @@ @Override public List leaves() { - if (!isTopLevel) + if (!isTopLevel) { throw new UnsupportedOperationException("This is not a top-level context."); + } assert leaves != null; return leaves; } Index: lucene/core/src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (revision 1367080) +++ lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (working copy) @@ -1241,6 +1241,78 @@ } } + /** Expert: attempts to delete by document ID, as long as + * the provided reader is a near-real-time reader (from {@link + * DirectoryReader#open(IndexWriter,boolean)}). If the + * provided reader is an NRT reader obtained from this + * writer, and its segment has not been merged away, then + * the delete succeeds and this method returns true; else, it + * returns false the caller must then separately delete by + * Term or Query. + * + * NOTE: this method can only delete documents + * visible to the currently open NRT reader. If you need + * to delete documents indexed after opening the NRT + * reader you must use the other deleteDocument methods + * (e.g., {@link #deleteDocuments(Term)}). */ + public synchronized boolean tryDeleteDocument(IndexReader readerIn, int docID) throws IOException { + + final AtomicReader reader; + if (readerIn instanceof AtomicReader) { + // Reader is already atomic: use the incoming docID: + reader = (AtomicReader) readerIn; + } else { + // Composite reader: lookup sub-reader and re-base docID: + List leaves = readerIn.getTopReaderContext().leaves(); + int subIndex = ReaderUtil.subIndex(docID, leaves); + reader = leaves.get(subIndex).reader(); + docID -= leaves.get(subIndex).docBase; + assert docID >= 0; + assert docID < reader.maxDoc(); + } + + if (!(reader instanceof SegmentReader)) { + throw new IllegalArgumentException("the reader must be a SegmentReader or composite reader containing only SegmentReaders"); + } + + final SegmentInfoPerCommit info = ((SegmentReader) reader).getSegmentInfo(); + + // TODO: this is a slow linear search, but, number of + // segments should be contained unless something is + // seriously wrong w/ the index, so it should be a minor + // cost: + + if (segmentInfos.indexOf(info) != -1) { + ReadersAndLiveDocs rld = readerPool.get(info, false); + if (rld != null) { + synchronized(bufferedDeletesStream) { + rld.initWritableLiveDocs(); + if (rld.delete(docID)) { + final int fullDelCount = rld.info.getDelCount() + rld.getPendingDeleteCount(); + if (fullDelCount == rld.info.info.getDocCount()) { + // If a merge has already registered for this + // segment, we leave it in the readerPool; the + // merge will skip merging it and will then drop + // it once it's done: + if (!mergingSegments.contains(rld.info)) { + segmentInfos.remove(rld.info); + readerPool.drop(rld.info); + checkpoint(); + } + } + } + //System.out.println(" yes " + info.info.name + " " + docID); + return true; + } + } else { + //System.out.println(" no rld " + info.info.name + " " + docID); + } + } else { + //System.out.println(" no seg " + info.info.name + " " + docID); + } + return false; + } + /** * Deletes the document(s) containing any of the * terms. All given deletes are applied and flushed atomically Index: lucene/core/src/java/org/apache/lucene/index/IndexReaderContext.java =================================================================== --- lucene/core/src/java/org/apache/lucene/index/IndexReaderContext.java (revision 1367080) +++ lucene/core/src/java/org/apache/lucene/index/IndexReaderContext.java (working copy) @@ -1,7 +1,5 @@ package org.apache.lucene.index; -import java.util.List; - /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -19,6 +17,8 @@ * limitations under the License. */ +import java.util.List; + /** * A struct like class that represents a hierarchical relationship between * {@link IndexReader} instances.