Index: lucene/CHANGES.txt
--- lucene/CHANGES.txt Sun Jan 30 12:34:56 2011 -0500
+++ lucene/CHANGES.txt Sun Jan 30 13:03:35 2011 -0500
@@ -635,6 +635,13 @@
it should keep it itself. Fixed Scorers to pass their parent Weight, so that
Scorer.visitSubScorers (LUCENE-2590) will work correctly.
(Robert Muir, Doron Cohen)
+
+* LUCENE-2900: When opening a near-real-time (NRT) reader
+ (IndexReader.re/open(IndexWriter)) you can now specify whether
+ deletes should be applied. Applying deletes can be costly, and some
+ expert use cases can handle seeing deleted documents returned. The
+ deletes remain buffered so that the next time you open an NRT reader
+ and pass true, all deletes will be a applied. (Mike McCandless)
Bug fixes
Index: lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java
--- lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java Sun Jan 30 12:34:56 2011 -0500
+++ lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java Sun Jan 30 13:03:35 2011 -0500
@@ -1277,7 +1277,7 @@
Document doc = new Document();
doc.add(newField("field", "", Field.Store.NO, Field.Index.ANALYZED));
w.addDocument(doc);
- IndexReader r = IndexReader.open(w);
+ IndexReader r = IndexReader.open(w, true);
IndexSearcher s = new IndexSearcher(r);
Query q = new StandardQueryParser(new CannedAnalyzer()).parse("\"a\"", "field");
Index: lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java
--- lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java Sun Jan 30 12:34:56 2011 -0500
+++ lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java Sun Jan 30 13:03:35 2011 -0500
@@ -99,7 +99,7 @@
public void testLatLongFilterOnDeletedDocs() throws Exception {
writer.deleteDocuments(new Term("name", "Potomac"));
- IndexReader r = IndexReader.open(writer);
+ IndexReader r = IndexReader.open(writer, true);
LatLongDistanceFilter f = new LatLongDistanceFilter(new QueryWrapperFilter(new MatchAllDocsQuery()),
lat, lng, 1.0, latField, lngField);
Index: lucene/src/java/org/apache/lucene/index/DirectoryReader.java
--- lucene/src/java/org/apache/lucene/index/DirectoryReader.java Sun Jan 30 12:34:56 2011 -0500
+++ lucene/src/java/org/apache/lucene/index/DirectoryReader.java Sun Jan 30 13:03:35 2011 -0500
@@ -70,6 +70,8 @@
// opened on a past IndexCommit:
private long maxIndexVersion;
+ private final boolean applyAllDeletes;
+
// static IndexReader open(final Directory directory, final IndexDeletionPolicy deletionPolicy, final IndexCommit commit, final boolean readOnly,
// final int termInfosIndexDivisor) throws CorruptIndexException, IOException {
// return open(directory, deletionPolicy, commit, readOnly, termInfosIndexDivisor, null);
@@ -107,6 +109,7 @@
this.codecs = codecs;
}
readerFinishedListeners = new MapBackedSet(new ConcurrentHashMap());
+ applyAllDeletes = false;
// To reduce the chance of hitting FileNotFound
// (and having to retry), we open segments in
@@ -138,9 +141,11 @@
}
// Used by near real-time search
- DirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor, CodecProvider codecs) throws IOException {
+ DirectoryReader(IndexWriter writer, SegmentInfos infos, int termInfosIndexDivisor, CodecProvider codecs, boolean applyAllDeletes) throws IOException {
this.directory = writer.getDirectory();
this.readOnly = true;
+ this.applyAllDeletes = applyAllDeletes; // saved for reopen
+
segmentInfos = (SegmentInfos) infos.clone();// make sure we clone otherwise we share mutable state with IW
this.termInfosIndexDivisor = termInfosIndexDivisor;
if (codecs == null) {
@@ -193,6 +198,7 @@
this.segmentInfos = infos;
this.termInfosIndexDivisor = termInfosIndexDivisor;
this.readerFinishedListeners = readerFinishedListeners;
+ applyAllDeletes = false;
if (codecs == null) {
this.codecs = CodecProvider.getDefault();
@@ -401,7 +407,7 @@
// TODO: right now we *always* make a new reader; in
// the future we could have write make some effort to
// detect that no changes have occurred
- IndexReader reader = writer.getReader();
+ IndexReader reader = writer.getReader(applyAllDeletes);
reader.readerFinishedListeners = readerFinishedListeners;
return reader;
}
Index: lucene/src/java/org/apache/lucene/index/IndexReader.java
--- lucene/src/java/org/apache/lucene/index/IndexReader.java Sun Jan 30 12:34:56 2011 -0500
+++ lucene/src/java/org/apache/lucene/index/IndexReader.java Sun Jan 30 13:03:35 2011 -0500
@@ -295,24 +295,26 @@
/**
* Open a near real time IndexReader from the {@link org.apache.lucene.index.IndexWriter}.
*
- *
* @param writer The IndexWriter to open from
+ * @param applyAllDeletes If true, all buffered deletes will
+ * be applied (made visible) in the returned reader. If
+ * false, the deletes are not applied but remain buffered
+ * (in IndexWriter) so that they will be applied in the
+ * future. Applying deletes can be costly, so if your app
+ * can tolerate deleted documents being returned you might
+ * gain some performance by passing false.
* @return The new IndexReader
* @throws CorruptIndexException
* @throws IOException if there is a low-level IO error
*
- * @see #reopen(IndexWriter)
+ * @see #reopen(IndexWriter,boolean)
*
* @lucene.experimental
*/
- public static IndexReader open(final IndexWriter writer) throws CorruptIndexException, IOException {
- return writer.getReader();
+ public static IndexReader open(final IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException {
+ return writer.getReader(applyAllDeletes);
}
-
-
-
-
/** Expert: returns an IndexReader reading the index in the given
* {@link IndexCommit}. You should pass readOnly=true, since it
* gives much better concurrent performance, unless you
@@ -617,18 +619,26 @@
* if you attempt to reopen any of those readers, you'll
* hit an {@link AlreadyClosedException}.
*
- * @lucene.experimental
- *
* @return IndexReader that covers entire index plus all
* changes made so far by this IndexWriter instance
*
+ * @param writer The IndexWriter to open from
+ * @param applyAllDeletes If true, all buffered deletes will
+ * be applied (made visible) in the returned reader. If
+ * false, the deletes are not applied but remain buffered
+ * (in IndexWriter) so that they will be applied in the
+ * future. Applying deletes can be costly, so if your app
+ * can tolerate deleted documents being returned you might
+ * gain some performance by passing false.
+ *
* @throws IOException
+ *
+ * @lucene.experimental
*/
- public IndexReader reopen(IndexWriter writer) throws CorruptIndexException, IOException {
- return writer.getReader();
+ public IndexReader reopen(IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException {
+ return writer.getReader(applyAllDeletes);
}
-
/**
* Efficiently clones the IndexReader (sharing most
* internal state).
Index: lucene/src/java/org/apache/lucene/index/IndexWriter.java
--- lucene/src/java/org/apache/lucene/index/IndexWriter.java Sun Jan 30 12:34:56 2011 -0500
+++ lucene/src/java/org/apache/lucene/index/IndexWriter.java Sun Jan 30 13:03:35 2011 -0500
@@ -274,6 +274,10 @@
// for testing
boolean anyNonBulkMerges;
+ IndexReader getReader() throws IOException {
+ return getReader(true);
+ }
+
/**
* Expert: returns a readonly reader, covering all
* committed as well as un-committed changes to the index.
@@ -333,7 +337,7 @@
*
* @throws IOException
*/
- IndexReader getReader() throws IOException {
+ IndexReader getReader(boolean applyAllDeletes) throws IOException {
ensureOpen();
final long tStart = System.currentTimeMillis();
@@ -352,8 +356,8 @@
// just like we do when loading segments_N
IndexReader r;
synchronized(this) {
- flush(false, true);
- r = new DirectoryReader(this, segmentInfos, config.getReaderTermsIndexDivisor(), codecs);
+ flush(false, applyAllDeletes);
+ r = new DirectoryReader(this, segmentInfos, config.getReaderTermsIndexDivisor(), codecs, applyAllDeletes);
if (infoStream != null) {
message("return reader version=" + r.getVersion() + " reader=" + r);
}
@@ -2463,9 +2467,9 @@
* to the Directory.
* @param triggerMerge if true, we may merge segments (if
* deletes or docs were flushed) if necessary
- * @param flushDeletes whether pending deletes should also
+ * @param applyAllDeletes whether pending deletes should also
*/
- protected final void flush(boolean triggerMerge, boolean flushDeletes) throws CorruptIndexException, IOException {
+ protected final void flush(boolean triggerMerge, boolean applyAllDeletes) throws CorruptIndexException, IOException {
// NOTE: this method cannot be sync'd because
// maybeMerge() in turn calls mergeScheduler.merge which
@@ -2476,7 +2480,7 @@
// We can be called during close, when closing==true, so we must pass false to ensureOpen:
ensureOpen(false);
- if (doFlush(flushDeletes) && triggerMerge) {
+ if (doFlush(applyAllDeletes) && triggerMerge) {
maybeMerge();
}
}
Index: lucene/src/test/org/apache/lucene/TestExternalCodecs.java
--- lucene/src/test/org/apache/lucene/TestExternalCodecs.java Sun Jan 30 12:34:56 2011 -0500
+++ lucene/src/test/org/apache/lucene/TestExternalCodecs.java Sun Jan 30 13:03:35 2011 -0500
@@ -660,7 +660,7 @@
}
w.deleteDocuments(new Term("id", "77"));
- IndexReader r = IndexReader.open(w);
+ IndexReader r = IndexReader.open(w, true);
IndexReader[] subs = r.getSequentialSubReaders();
// test each segment
for(int i=0;i 1);
Index: lucene/src/test/org/apache/lucene/search/payloads/PayloadHelper.java
--- lucene/src/test/org/apache/lucene/search/payloads/PayloadHelper.java Sun Jan 30 12:34:56 2011 -0500
+++ lucene/src/test/org/apache/lucene/search/payloads/PayloadHelper.java Sun Jan 30 13:03:35 2011 -0500
@@ -126,7 +126,7 @@
doc.add(new Field(NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED));
writer.addDocument(doc);
}
- reader = IndexReader.open(writer);
+ reader = IndexReader.open(writer, true);
writer.close();
IndexSearcher searcher = new IndexSearcher(reader);
Index: lucene/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java
--- lucene/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java Sun Jan 30 12:34:56 2011 -0500
+++ lucene/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java Sun Jan 30 13:03:35 2011 -0500
@@ -49,7 +49,7 @@
setMergePolicy(newLogMergePolicy(false))
);
TestIndexWriterReader.createIndexNoClose(true, "ram", writer);
- IndexReader reader = IndexReader.open(writer);
+ IndexReader reader = IndexReader.open(writer, true);
assertEquals(100, reader.maxDoc());
writer.commit();
// we should see only fdx,fdt files here
Index: lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java
--- lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java Sun Jan 30 12:34:56 2011 -0500
+++ lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java Sun Jan 30 13:03:35 2011 -0500
@@ -960,7 +960,7 @@
writer.addDocument(doc);
docCount++;
}
- IndexReader r = IndexReader.open(writer);
+ IndexReader r = IndexReader.open(writer, true);
writer.close();
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(random.nextBoolean());
Builder builder = new Builder(FST.INPUT_TYPE.BYTE1, 0, 0, true, outputs);
Index: modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NearRealtimeReaderTask.java
--- modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NearRealtimeReaderTask.java Sun Jan 30 12:34:56 2011 -0500
+++ modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NearRealtimeReaderTask.java Sun Jan 30 13:03:35 2011 -0500
@@ -59,7 +59,7 @@
}
long t = System.currentTimeMillis();
- IndexReader r = IndexReader.open(w);
+ IndexReader r = IndexReader.open(w, true);
runData.setIndexReader(r);
// Transfer our reference to runData
r.decRef();