Index: lucene/CHANGES.txt --- lucene/CHANGES.txt Sun Jan 30 12:34:56 2011 -0500 +++ lucene/CHANGES.txt Sun Jan 30 13:03:35 2011 -0500 @@ -635,6 +635,13 @@ it should keep it itself. Fixed Scorers to pass their parent Weight, so that Scorer.visitSubScorers (LUCENE-2590) will work correctly. (Robert Muir, Doron Cohen) + +* LUCENE-2900: When opening a near-real-time (NRT) reader + (IndexReader.re/open(IndexWriter)) you can now specify whether + deletes should be applied. Applying deletes can be costly, and some + expert use cases can handle seeing deleted documents returned. The + deletes remain buffered so that the next time you open an NRT reader + and pass true, all deletes will be a applied. (Mike McCandless) Bug fixes Index: lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java --- lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java Sun Jan 30 12:34:56 2011 -0500 +++ lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java Sun Jan 30 13:03:35 2011 -0500 @@ -1277,7 +1277,7 @@ Document doc = new Document(); doc.add(newField("field", "", Field.Store.NO, Field.Index.ANALYZED)); w.addDocument(doc); - IndexReader r = IndexReader.open(w); + IndexReader r = IndexReader.open(w, true); IndexSearcher s = new IndexSearcher(r); Query q = new StandardQueryParser(new CannedAnalyzer()).parse("\"a\"", "field"); Index: lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java --- lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java Sun Jan 30 12:34:56 2011 -0500 +++ lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java Sun Jan 30 13:03:35 2011 -0500 @@ -99,7 +99,7 @@ public void testLatLongFilterOnDeletedDocs() throws Exception { writer.deleteDocuments(new Term("name", "Potomac")); - IndexReader r = IndexReader.open(writer); + IndexReader r = IndexReader.open(writer, true); LatLongDistanceFilter f = new LatLongDistanceFilter(new QueryWrapperFilter(new MatchAllDocsQuery()), lat, lng, 1.0, latField, lngField); Index: lucene/src/java/org/apache/lucene/index/DirectoryReader.java --- lucene/src/java/org/apache/lucene/index/DirectoryReader.java Sun Jan 30 12:34:56 2011 -0500 +++ lucene/src/java/org/apache/lucene/index/DirectoryReader.java Sun Jan 30 13:03:35 2011 -0500 @@ -70,6 +70,8 @@ // opened on a past IndexCommit: private long maxIndexVersion; + private final boolean applyAllDeletes; + // static IndexReader open(final Directory directory, final IndexDeletionPolicy deletionPolicy, final IndexCommit commit, final boolean readOnly, // final int termInfosIndexDivisor) throws CorruptIndexException, IOException { // return open(directory, deletionPolicy, commit, readOnly, termInfosIndexDivisor, null); @@ -107,6 +109,7 @@ this.codecs = codecs; } readerFinishedListeners = new MapBackedSet(new ConcurrentHashMap()); + applyAllDeletes = false; // To reduce the chance of hitting FileNotFound // (and having to retry), we open segments in @@ -138,9 +141,11 @@ } // Used by near real-time search - DirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor, CodecProvider codecs) throws IOException { + DirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor, CodecProvider codecs, boolean applyAllDeletes) throws IOException { this.directory = writer.getDirectory(); this.readOnly = true; + this.applyAllDeletes = applyAllDeletes; // saved for reopen + segmentInfos = (SegmentInfos) infos.clone();// make sure we clone otherwise we share mutable state with IW this.termInfosIndexDivisor = termInfosIndexDivisor; if (codecs == null) { @@ -193,6 +198,7 @@ this.segmentInfos = infos; this.termInfosIndexDivisor = termInfosIndexDivisor; this.readerFinishedListeners = readerFinishedListeners; + applyAllDeletes = false; if (codecs == null) { this.codecs = CodecProvider.getDefault(); @@ -401,7 +407,7 @@ // TODO: right now we *always* make a new reader; in // the future we could have write make some effort to // detect that no changes have occurred - IndexReader reader = writer.getReader(); + IndexReader reader = writer.getReader(applyAllDeletes); reader.readerFinishedListeners = readerFinishedListeners; return reader; } Index: lucene/src/java/org/apache/lucene/index/IndexReader.java --- lucene/src/java/org/apache/lucene/index/IndexReader.java Sun Jan 30 12:34:56 2011 -0500 +++ lucene/src/java/org/apache/lucene/index/IndexReader.java Sun Jan 30 13:03:35 2011 -0500 @@ -295,24 +295,26 @@ /** * Open a near real time IndexReader from the {@link org.apache.lucene.index.IndexWriter}. * - * * @param writer The IndexWriter to open from + * @param applyAllDeletes If true, all buffered deletes will + * be applied (made visible) in the returned reader. If + * false, the deletes are not applied but remain buffered + * (in IndexWriter) so that they will be applied in the + * future. Applying deletes can be costly, so if your app + * can tolerate deleted documents being returned you might + * gain some performance by passing false. * @return The new IndexReader * @throws CorruptIndexException * @throws IOException if there is a low-level IO error * - * @see #reopen(IndexWriter) + * @see #reopen(IndexWriter,boolean) * * @lucene.experimental */ - public static IndexReader open(final IndexWriter writer) throws CorruptIndexException, IOException { - return writer.getReader(); + public static IndexReader open(final IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException { + return writer.getReader(applyAllDeletes); } - - - - /** Expert: returns an IndexReader reading the index in the given * {@link IndexCommit}. You should pass readOnly=true, since it * gives much better concurrent performance, unless you @@ -617,18 +619,26 @@ * if you attempt to reopen any of those readers, you'll * hit an {@link AlreadyClosedException}.

* - * @lucene.experimental - * * @return IndexReader that covers entire index plus all * changes made so far by this IndexWriter instance * + * @param writer The IndexWriter to open from + * @param applyAllDeletes If true, all buffered deletes will + * be applied (made visible) in the returned reader. If + * false, the deletes are not applied but remain buffered + * (in IndexWriter) so that they will be applied in the + * future. Applying deletes can be costly, so if your app + * can tolerate deleted documents being returned you might + * gain some performance by passing false. + * * @throws IOException + * + * @lucene.experimental */ - public IndexReader reopen(IndexWriter writer) throws CorruptIndexException, IOException { - return writer.getReader(); + public IndexReader reopen(IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException { + return writer.getReader(applyAllDeletes); } - /** * Efficiently clones the IndexReader (sharing most * internal state). Index: lucene/src/java/org/apache/lucene/index/IndexWriter.java --- lucene/src/java/org/apache/lucene/index/IndexWriter.java Sun Jan 30 12:34:56 2011 -0500 +++ lucene/src/java/org/apache/lucene/index/IndexWriter.java Sun Jan 30 13:03:35 2011 -0500 @@ -274,6 +274,10 @@ // for testing boolean anyNonBulkMerges; + IndexReader getReader() throws IOException { + return getReader(true); + } + /** * Expert: returns a readonly reader, covering all * committed as well as un-committed changes to the index. @@ -333,7 +337,7 @@ * * @throws IOException */ - IndexReader getReader() throws IOException { + IndexReader getReader(boolean applyAllDeletes) throws IOException { ensureOpen(); final long tStart = System.currentTimeMillis(); @@ -352,8 +356,8 @@ // just like we do when loading segments_N IndexReader r; synchronized(this) { - flush(false, true); - r = new DirectoryReader(this, segmentInfos, config.getReaderTermsIndexDivisor(), codecs); + flush(false, applyAllDeletes); + r = new DirectoryReader(this, segmentInfos, config.getReaderTermsIndexDivisor(), codecs, applyAllDeletes); if (infoStream != null) { message("return reader version=" + r.getVersion() + " reader=" + r); } @@ -2463,9 +2467,9 @@ * to the Directory. * @param triggerMerge if true, we may merge segments (if * deletes or docs were flushed) if necessary - * @param flushDeletes whether pending deletes should also + * @param applyAllDeletes whether pending deletes should also */ - protected final void flush(boolean triggerMerge, boolean flushDeletes) throws CorruptIndexException, IOException { + protected final void flush(boolean triggerMerge, boolean applyAllDeletes) throws CorruptIndexException, IOException { // NOTE: this method cannot be sync'd because // maybeMerge() in turn calls mergeScheduler.merge which @@ -2476,7 +2480,7 @@ // We can be called during close, when closing==true, so we must pass false to ensureOpen: ensureOpen(false); - if (doFlush(flushDeletes) && triggerMerge) { + if (doFlush(applyAllDeletes) && triggerMerge) { maybeMerge(); } } Index: lucene/src/test/org/apache/lucene/TestExternalCodecs.java --- lucene/src/test/org/apache/lucene/TestExternalCodecs.java Sun Jan 30 12:34:56 2011 -0500 +++ lucene/src/test/org/apache/lucene/TestExternalCodecs.java Sun Jan 30 13:03:35 2011 -0500 @@ -660,7 +660,7 @@ } w.deleteDocuments(new Term("id", "77")); - IndexReader r = IndexReader.open(w); + IndexReader r = IndexReader.open(w, true); IndexReader[] subs = r.getSequentialSubReaders(); // test each segment for(int i=0;i 1); Index: lucene/src/test/org/apache/lucene/search/payloads/PayloadHelper.java --- lucene/src/test/org/apache/lucene/search/payloads/PayloadHelper.java Sun Jan 30 12:34:56 2011 -0500 +++ lucene/src/test/org/apache/lucene/search/payloads/PayloadHelper.java Sun Jan 30 13:03:35 2011 -0500 @@ -126,7 +126,7 @@ doc.add(new Field(NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); } - reader = IndexReader.open(writer); + reader = IndexReader.open(writer, true); writer.close(); IndexSearcher searcher = new IndexSearcher(reader); Index: lucene/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java --- lucene/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java Sun Jan 30 12:34:56 2011 -0500 +++ lucene/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java Sun Jan 30 13:03:35 2011 -0500 @@ -49,7 +49,7 @@ setMergePolicy(newLogMergePolicy(false)) ); TestIndexWriterReader.createIndexNoClose(true, "ram", writer); - IndexReader reader = IndexReader.open(writer); + IndexReader reader = IndexReader.open(writer, true); assertEquals(100, reader.maxDoc()); writer.commit(); // we should see only fdx,fdt files here Index: lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java --- lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java Sun Jan 30 12:34:56 2011 -0500 +++ lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java Sun Jan 30 13:03:35 2011 -0500 @@ -960,7 +960,7 @@ writer.addDocument(doc); docCount++; } - IndexReader r = IndexReader.open(writer); + IndexReader r = IndexReader.open(writer, true); writer.close(); final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(random.nextBoolean()); Builder builder = new Builder(FST.INPUT_TYPE.BYTE1, 0, 0, true, outputs); Index: modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NearRealtimeReaderTask.java --- modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NearRealtimeReaderTask.java Sun Jan 30 12:34:56 2011 -0500 +++ modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NearRealtimeReaderTask.java Sun Jan 30 13:03:35 2011 -0500 @@ -59,7 +59,7 @@ } long t = System.currentTimeMillis(); - IndexReader r = IndexReader.open(w); + IndexReader r = IndexReader.open(w, true); runData.setIndexReader(r); // Transfer our reference to runData r.decRef();