Index: CHANGES.txt =================================================================== --- CHANGES.txt (revision 505296) +++ CHANGES.txt (working copy) @@ -102,10 +102,10 @@ their passing unit tests. (Otis Gospodnetic) -13. LUCENE-565: Added NewIndexModifier (subclass of IndexWriter) to - more efficiently handle updating documents (the "delete then add" - use case). This is intended to be an eventual replacement for the - existing IndexModifier. Added IndexWriter.flush() (renamed from +13. LUCENE-565: Added methods to IndexWriter to more efficiently + handle updating documents (the "delete then add" use case). This + is intended to be an eventual replacement for the existing + IndexModifier. Added IndexWriter.flush() (renamed from flushRamSegments()) to flush all pending updates (held in RAM), to the Directory. (Ning Li via Mike McCandless) Index: src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java =================================================================== --- src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java (revision 505296) +++ src/test/org/apache/lucene/index/TestIndexWriterMergePolicy.java (working copy) @@ -40,7 +40,7 @@ for (int i = 0; i < 100; i++) { addDoc(writer); checkInvariants(writer); - if (writer.getRAMSegmentCount() + writer.getSegmentCount() >= 18) { + if (writer.getRamSegmentCount() + writer.getSegmentCount() >= 18) { noOverMerge = true; } } @@ -178,7 +178,7 @@ int mergeFactor = writer.getMergeFactor(); int maxMergeDocs = writer.getMaxMergeDocs(); - int ramSegmentCount = writer.getRAMSegmentCount(); + int ramSegmentCount = writer.getRamSegmentCount(); assertTrue(ramSegmentCount < maxBufferedDocs); int lowerBound = -1; Index: src/test/org/apache/lucene/index/TestIndexWriterDelete.java =================================================================== --- src/test/org/apache/lucene/index/TestIndexWriterDelete.java (revision 0) +++ src/test/org/apache/lucene/index/TestIndexWriterDelete.java (revision 0) @@ -0,0 +1,444 @@ +package org.apache.lucene.index; + +import java.io.IOException; +import java.util.Arrays; + +import junit.framework.TestCase; + +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.search.Hits; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.MockRAMDirectory; +import org.apache.lucene.store.RAMDirectory; + +public class TestIndexWriterDelete extends TestCase { + + // test the simple case + public void testSimpleCase() throws IOException { + String[] keywords = { "1", "2" }; + String[] unindexed = { "Netherlands", "Italy" }; + String[] unstored = { "Amsterdam has lots of bridges", + "Venice has lots of canals" }; + String[] text = { "Amsterdam", "Venice" }; + + Directory dir = new RAMDirectory(); + IndexWriter modifier = new IndexWriter(dir, + new WhitespaceAnalyzer(), true); + modifier.setUseCompoundFile(true); + modifier.setMaxBufferedDeleteTerms(1); + + for (int i = 0; i < keywords.length; i++) { + Document doc = new Document(); + doc.add(new Field("id", keywords[i], Field.Store.YES, + Field.Index.UN_TOKENIZED)); + doc.add(new Field("country", unindexed[i], Field.Store.YES, + Field.Index.NO)); + doc.add(new Field("contents", unstored[i], Field.Store.NO, + Field.Index.TOKENIZED)); + doc + .add(new Field("city", text[i], Field.Store.YES, + Field.Index.TOKENIZED)); + modifier.addDocument(doc); + } + modifier.optimize(); + + Term term = new Term("city", "Amsterdam"); + int hitCount = getHitCount(dir, term); + assertEquals(1, hitCount); + modifier.deleteDocuments(term); + hitCount = getHitCount(dir, term); + assertEquals(0, hitCount); + + modifier.close(); + } + + // test when delete terms only apply to disk segments + public void testNonRAMDelete() throws IOException { + Directory dir = new RAMDirectory(); + IndexWriter modifier = new IndexWriter(dir, + new WhitespaceAnalyzer(), true); + modifier.setMaxBufferedDocs(2); + modifier.setMaxBufferedDeleteTerms(2); + + int id = 0; + int value = 100; + + for (int i = 0; i < 7; i++) { + addDoc(modifier, ++id, value); + } + modifier.flush(); + + assertEquals(0, modifier.getRamSegmentCount()); + assertTrue(0 < modifier.getSegmentCount()); + + IndexReader reader = IndexReader.open(dir); + assertEquals(7, reader.numDocs()); + reader.close(); + + modifier.deleteDocuments(new Term("value", String.valueOf(value))); + modifier.deleteDocuments(new Term("value", String.valueOf(value))); + + reader = IndexReader.open(dir); + assertEquals(0, reader.numDocs()); + reader.close(); + + modifier.close(); + } + + // test when delete terms only apply to ram segments + public void testRAMDeletes() throws IOException { + Directory dir = new RAMDirectory(); + IndexWriter modifier = new IndexWriter(dir, + new WhitespaceAnalyzer(), true); + modifier.setMaxBufferedDocs(4); + modifier.setMaxBufferedDeleteTerms(4); + + int id = 0; + int value = 100; + + addDoc(modifier, ++id, value); + modifier.deleteDocuments(new Term("value", String.valueOf(value))); + addDoc(modifier, ++id, value); + modifier.deleteDocuments(new Term("value", String.valueOf(value))); + + assertEquals(2, modifier.getNumBufferedDeleteTerms()); + assertEquals(1, modifier.getBufferedDeleteTermsSize()); + + addDoc(modifier, ++id, value); + assertEquals(0, modifier.getSegmentCount()); + modifier.flush(); + + IndexReader reader = IndexReader.open(dir); + assertEquals(1, reader.numDocs()); + + int hitCount = getHitCount(dir, new Term("id", String.valueOf(id))); + assertEquals(1, hitCount); + reader.close(); + + modifier.close(); + } + + // test when delete terms apply to both disk and ram segments + public void testBothDeletes() throws IOException { + Directory dir = new RAMDirectory(); + IndexWriter modifier = new IndexWriter(dir, + new WhitespaceAnalyzer(), true); + modifier.setMaxBufferedDocs(100); + modifier.setMaxBufferedDeleteTerms(100); + + int id = 0; + int value = 100; + + for (int i = 0; i < 5; i++) { + addDoc(modifier, ++id, value); + } + + value = 200; + for (int i = 0; i < 5; i++) { + addDoc(modifier, ++id, value); + } + modifier.flush(); + + for (int i = 0; i < 5; i++) { + addDoc(modifier, ++id, value); + } + modifier.deleteDocuments(new Term("value", String.valueOf(value))); + modifier.flush(); + + IndexReader reader = IndexReader.open(dir); + assertEquals(5, reader.numDocs()); + + modifier.close(); + } + + // test that batched delete terms are flushed together + public void testBatchDeletes() throws IOException { + Directory dir = new RAMDirectory(); + IndexWriter modifier = new IndexWriter(dir, + new WhitespaceAnalyzer(), true); + modifier.setMaxBufferedDocs(2); + modifier.setMaxBufferedDeleteTerms(2); + + int id = 0; + int value = 100; + + for (int i = 0; i < 7; i++) { + addDoc(modifier, ++id, value); + } + modifier.flush(); + + IndexReader reader = IndexReader.open(dir); + assertEquals(7, reader.numDocs()); + reader.close(); + + id = 0; + modifier.deleteDocuments(new Term("id", String.valueOf(++id))); + modifier.deleteDocuments(new Term("id", String.valueOf(++id))); + + reader = IndexReader.open(dir); + assertEquals(5, reader.numDocs()); + reader.close(); + + Term[] terms = new Term[3]; + for (int i = 0; i < terms.length; i++) { + terms[i] = new Term("id", String.valueOf(++id)); + } + modifier.deleteDocuments(terms); + + reader = IndexReader.open(dir); + assertEquals(2, reader.numDocs()); + reader.close(); + + modifier.close(); + } + + private void addDoc(IndexWriter modifier, int id, int value) + throws IOException { + Document doc = new Document(); + doc.add(new Field("content", "aaa", Field.Store.NO, Field.Index.TOKENIZED)); + doc.add(new Field("id", String.valueOf(id), Field.Store.YES, + Field.Index.UN_TOKENIZED)); + doc.add(new Field("value", String.valueOf(value), Field.Store.NO, + Field.Index.UN_TOKENIZED)); + modifier.addDocument(doc); + } + + private int getHitCount(Directory dir, Term term) throws IOException { + IndexSearcher searcher = new IndexSearcher(dir); + int hitCount = searcher.search(new TermQuery(term)).length(); + searcher.close(); + return hitCount; + } + + public void testDeletesOnDiskFull() throws IOException { + testOperationsOnDiskFull(false); + } + + public void testUpdatesOnDiskFull() throws IOException { + testOperationsOnDiskFull(true); + } + + /** + * Make sure if modifier tries to commit but hits disk full that modifier + * remains consistent and usable. Similar to TestIndexReader.testDiskFull(). + */ + private void testOperationsOnDiskFull(boolean updates) throws IOException { + + boolean debug = false; + Term searchTerm = new Term("content", "aaa"); + int START_COUNT = 157; + int END_COUNT = 144; + + // First build up a starting index: + RAMDirectory startDir = new RAMDirectory(); + IndexWriter writer = new IndexWriter(startDir, new WhitespaceAnalyzer(), + true); + for (int i = 0; i < 157; i++) { + Document d = new Document(); + d.add(new Field("id", Integer.toString(i), Field.Store.YES, + Field.Index.UN_TOKENIZED)); + d.add(new Field("content", "aaa " + i, Field.Store.NO, + Field.Index.TOKENIZED)); + writer.addDocument(d); + } + writer.close(); + + long diskUsage = startDir.sizeInBytes(); + long diskFree = diskUsage + 10; + + IOException err = null; + + boolean done = false; + + // Iterate w/ ever increasing free disk space: + while (!done) { + MockRAMDirectory dir = new MockRAMDirectory(startDir); + IndexWriter modifier = new IndexWriter(dir, + new WhitespaceAnalyzer(), false); + + modifier.setMaxBufferedDocs(1000); // use flush or close + modifier.setMaxBufferedDeleteTerms(1000); // use flush or close + + // For each disk size, first try to commit against + // dir that will hit random IOExceptions & disk + // full; after, give it infinite disk space & turn + // off random IOExceptions & retry w/ same reader: + boolean success = false; + + for (int x = 0; x < 2; x++) { + + double rate = 0.1; + double diskRatio = ((double)diskFree) / diskUsage; + long thisDiskFree; + String testName; + + if (0 == x) { + thisDiskFree = diskFree; + if (diskRatio >= 2.0) { + rate /= 2; + } + if (diskRatio >= 4.0) { + rate /= 2; + } + if (diskRatio >= 6.0) { + rate = 0.0; + } + if (debug) { + System.out.println("\ncycle: " + diskFree + " bytes"); + } + testName = "disk full during reader.close() @ " + thisDiskFree + + " bytes"; + } else { + thisDiskFree = 0; + rate = 0.0; + if (debug) { + System.out.println("\ncycle: same writer: unlimited disk space"); + } + testName = "reader re-use after disk full"; + } + + dir.setMaxSizeInBytes(thisDiskFree); + dir.setRandomIOExceptionRate(rate, diskFree); + + try { + if (0 == x) { + int docId = 12; + for (int i = 0; i < 13; i++) { + if (updates) { + Document d = new Document(); + d.add(new Field("id", Integer.toString(i), Field.Store.YES, + Field.Index.UN_TOKENIZED)); + d.add(new Field("content", "bbb " + i, Field.Store.NO, + Field.Index.TOKENIZED)); + modifier.updateDocument(new Term("id", Integer.toString(docId)), d); + } else { // deletes + modifier.deleteDocuments(new Term("id", Integer.toString(docId))); + // modifier.setNorm(docId, "contents", (float)2.0); + } + docId += 12; + } + } + modifier.close(); + success = true; + if (0 == x) { + done = true; + } + } + catch (IOException e) { + if (debug) { + System.out.println(" hit IOException: " + e); + } + err = e; + if (1 == x) { + e.printStackTrace(); + fail(testName + " hit IOException after disk space was freed up"); + } + } + + // Whether we succeeded or failed, check that all + // un-referenced files were in fact deleted (ie, + // we did not create garbage). Just create a + // new IndexFileDeleter, have it delete + // unreferenced files, then verify that in fact + // no files were deleted: + String[] startFiles = dir.list(); + SegmentInfos infos = new SegmentInfos(); + infos.read(dir); + IndexFileDeleter d = new IndexFileDeleter(infos, dir); + d.findDeletableFiles(); + d.deleteFiles(); + String[] endFiles = dir.list(); + + Arrays.sort(startFiles); + Arrays.sort(endFiles); + + // for(int i=0;i 0) { + s += "\n "; + } + s += l[i]; + } + return s; + } +} Index: src/test/org/apache/lucene/index/TestNewIndexModifierDelete.java =================================================================== --- src/test/org/apache/lucene/index/TestNewIndexModifierDelete.java (revision 505296) +++ src/test/org/apache/lucene/index/TestNewIndexModifierDelete.java (working copy) @@ -1,446 +0,0 @@ -package org.apache.lucene.index; - -import java.io.IOException; -import java.util.Arrays; - -import junit.framework.TestCase; - -import org.apache.lucene.analysis.WhitespaceAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.search.Hits; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.MockRAMDirectory; -import org.apache.lucene.store.RAMDirectory; - -public class TestNewIndexModifierDelete extends TestCase { - - // test the simple case - public void testSimpleCase() throws IOException { - String[] keywords = { "1", "2" }; - String[] unindexed = { "Netherlands", "Italy" }; - String[] unstored = { "Amsterdam has lots of bridges", - "Venice has lots of canals" }; - String[] text = { "Amsterdam", "Venice" }; - - Directory dir = new RAMDirectory(); - NewIndexModifier modifier = new NewIndexModifier(dir, - new WhitespaceAnalyzer(), true); - modifier.setUseCompoundFile(true); - modifier.setMaxBufferedDeleteTerms(1); - - for (int i = 0; i < keywords.length; i++) { - Document doc = new Document(); - doc.add(new Field("id", keywords[i], Field.Store.YES, - Field.Index.UN_TOKENIZED)); - doc.add(new Field("country", unindexed[i], Field.Store.YES, - Field.Index.NO)); - doc.add(new Field("contents", unstored[i], Field.Store.NO, - Field.Index.TOKENIZED)); - doc - .add(new Field("city", text[i], Field.Store.YES, - Field.Index.TOKENIZED)); - modifier.addDocument(doc); - } - modifier.optimize(); - - Term term = new Term("city", "Amsterdam"); - int hitCount = getHitCount(dir, term); - assertEquals(1, hitCount); - modifier.deleteDocuments(term); - hitCount = getHitCount(dir, term); - assertEquals(0, hitCount); - - modifier.close(); - } - - // test when delete terms only apply to disk segments - public void testNonRAMDelete() throws IOException { - Directory dir = new RAMDirectory(); - NewIndexModifier modifier = new NewIndexModifier(dir, - new WhitespaceAnalyzer(), true); - modifier.setMaxBufferedDocs(2); - modifier.setMaxBufferedDeleteTerms(2); - - int id = 0; - int value = 100; - - for (int i = 0; i < 7; i++) { - addDoc(modifier, ++id, value); - } - modifier.flush(); - - assertEquals(0, modifier.getRAMSegmentCount()); - assertTrue(0 < modifier.getSegmentCount()); - - IndexReader reader = IndexReader.open(dir); - assertEquals(7, reader.numDocs()); - reader.close(); - - modifier.deleteDocuments(new Term("value", String.valueOf(value))); - modifier.deleteDocuments(new Term("value", String.valueOf(value))); - - reader = IndexReader.open(dir); - assertEquals(0, reader.numDocs()); - reader.close(); - - modifier.close(); - } - - // test when delete terms only apply to ram segments - public void testRAMDeletes() throws IOException { - Directory dir = new RAMDirectory(); - NewIndexModifier modifier = new NewIndexModifier(dir, - new WhitespaceAnalyzer(), true); - modifier.setMaxBufferedDocs(4); - modifier.setMaxBufferedDeleteTerms(4); - - int id = 0; - int value = 100; - - addDoc(modifier, ++id, value); - modifier.deleteDocuments(new Term("value", String.valueOf(value))); - addDoc(modifier, ++id, value); - modifier.deleteDocuments(new Term("value", String.valueOf(value))); - - assertEquals(2, modifier.getNumBufferedDeleteTerms()); - assertEquals(1, modifier.getBufferedDeleteTermsSize()); - - addDoc(modifier, ++id, value); - assertEquals(0, modifier.getSegmentCount()); - modifier.flush(); - - IndexReader reader = IndexReader.open(dir); - assertEquals(1, reader.numDocs()); - - int hitCount = getHitCount(dir, new Term("id", String.valueOf(id))); - assertEquals(1, hitCount); - reader.close(); - - modifier.close(); - } - - // test when delete terms apply to both disk and ram segments - public void testBothDeletes() throws IOException { - Directory dir = new RAMDirectory(); - NewIndexModifier modifier = new NewIndexModifier(dir, - new WhitespaceAnalyzer(), true); - modifier.setMaxBufferedDocs(100); - modifier.setMaxBufferedDeleteTerms(100); - - int id = 0; - int value = 100; - - for (int i = 0; i < 5; i++) { - addDoc(modifier, ++id, value); - } - - value = 200; - for (int i = 0; i < 5; i++) { - addDoc(modifier, ++id, value); - } - modifier.flush(); - - for (int i = 0; i < 5; i++) { - addDoc(modifier, ++id, value); - } - modifier.deleteDocuments(new Term("value", String.valueOf(value))); - modifier.flush(); - - IndexReader reader = IndexReader.open(dir); - assertEquals(5, reader.numDocs()); - - modifier.close(); - } - - // test that batched delete terms are flushed together - public void testBatchDeletes() throws IOException { - Directory dir = new RAMDirectory(); - NewIndexModifier modifier = new NewIndexModifier(dir, - new WhitespaceAnalyzer(), true); - modifier.setMaxBufferedDocs(2); - modifier.setMaxBufferedDeleteTerms(2); - - int id = 0; - int value = 100; - - for (int i = 0; i < 7; i++) { - addDoc(modifier, ++id, value); - } - modifier.flush(); - - IndexReader reader = IndexReader.open(dir); - assertEquals(7, reader.numDocs()); - reader.close(); - - id = 0; - modifier.deleteDocuments(new Term("id", String.valueOf(++id))); - modifier.deleteDocuments(new Term("id", String.valueOf(++id))); - - reader = IndexReader.open(dir); - assertEquals(5, reader.numDocs()); - reader.close(); - - Term[] terms = new Term[3]; - for (int i = 0; i < terms.length; i++) { - terms[i] = new Term("id", String.valueOf(++id)); - } - modifier.deleteDocuments(terms); - - reader = IndexReader.open(dir); - assertEquals(2, reader.numDocs()); - reader.close(); - - modifier.close(); - } - - private void addDoc(NewIndexModifier modifier, int id, int value) - throws IOException { - Document doc = new Document(); - doc.add(new Field("content", "aaa", Field.Store.NO, Field.Index.TOKENIZED)); - doc.add(new Field("id", String.valueOf(id), Field.Store.YES, - Field.Index.UN_TOKENIZED)); - doc.add(new Field("value", String.valueOf(value), Field.Store.NO, - Field.Index.UN_TOKENIZED)); - modifier.addDocument(doc); - } - - private int getHitCount(Directory dir, Term term) throws IOException { - IndexSearcher searcher = new IndexSearcher(dir); - int hitCount = searcher.search(new TermQuery(term)).length(); - searcher.close(); - return hitCount; - } - - public void testDeletesOnDiskFull() throws IOException { - testOperationsOnDiskFull(false); - } - - public void testUpdatesOnDiskFull() throws IOException { - testOperationsOnDiskFull(true); - } - - /** - * Make sure if modifier tries to commit but hits disk full that modifier - * remains consistent and usable. Similar to TestIndexReader.testDiskFull(). - */ - private void testOperationsOnDiskFull(boolean updates) throws IOException { - - boolean debug = false; - Term searchTerm = new Term("content", "aaa"); - int START_COUNT = 157; - int END_COUNT = 144; - - // First build up a starting index: - RAMDirectory startDir = new RAMDirectory(); - IndexWriter writer = new IndexWriter(startDir, new WhitespaceAnalyzer(), - true); - for (int i = 0; i < 157; i++) { - Document d = new Document(); - d.add(new Field("id", Integer.toString(i), Field.Store.YES, - Field.Index.UN_TOKENIZED)); - d.add(new Field("content", "aaa " + i, Field.Store.NO, - Field.Index.TOKENIZED)); - writer.addDocument(d); - } - writer.close(); - - long diskUsage = startDir.sizeInBytes(); - long diskFree = diskUsage + 10; - - IOException err = null; - - boolean done = false; - - // Iterate w/ ever increasing free disk space: - while (!done) { - MockRAMDirectory dir = new MockRAMDirectory(startDir); - NewIndexModifier modifier = new NewIndexModifier(dir, - new WhitespaceAnalyzer(), false); - - modifier.setMaxBufferedDocs(1000); // use flush or close - modifier.setMaxBufferedDeleteTerms(1000); // use flush or close - - // For each disk size, first try to commit against - // dir that will hit random IOExceptions & disk - // full; after, give it infinite disk space & turn - // off random IOExceptions & retry w/ same reader: - boolean success = false; - - for (int x = 0; x < 2; x++) { - - double rate = 0.1; - double diskRatio = ((double)diskFree) / diskUsage; - long thisDiskFree; - String testName; - - if (0 == x) { - thisDiskFree = diskFree; - if (diskRatio >= 2.0) { - rate /= 2; - } - if (diskRatio >= 4.0) { - rate /= 2; - } - if (diskRatio >= 6.0) { - rate = 0.0; - } - if (debug) { - System.out.println("\ncycle: " + diskFree + " bytes"); - } - testName = "disk full during reader.close() @ " + thisDiskFree - + " bytes"; - } else { - thisDiskFree = 0; - rate = 0.0; - if (debug) { - System.out.println("\ncycle: same writer: unlimited disk space"); - } - testName = "reader re-use after disk full"; - } - - dir.setMaxSizeInBytes(thisDiskFree); - dir.setRandomIOExceptionRate(rate, diskFree); - - try { - if (0 == x) { - int docId = 12; - for (int i = 0; i < 13; i++) { - if (updates) { - Document d = new Document(); - d.add(new Field("id", Integer.toString(i), Field.Store.YES, - Field.Index.UN_TOKENIZED)); - d.add(new Field("content", "bbb " + i, Field.Store.NO, - Field.Index.TOKENIZED)); - modifier.updateDocument( - new Term("id", Integer.toString(docId)), d); - } else { // deletes - modifier - .deleteDocuments(new Term("id", Integer.toString(docId))); - // modifier.setNorm(docId, "contents", (float)2.0); - } - docId += 12; - } - } - modifier.close(); - success = true; - if (0 == x) { - done = true; - } - } - catch (IOException e) { - if (debug) { - System.out.println(" hit IOException: " + e); - } - err = e; - if (1 == x) { - e.printStackTrace(); - fail(testName + " hit IOException after disk space was freed up"); - } - } - - // Whether we succeeded or failed, check that all - // un-referenced files were in fact deleted (ie, - // we did not create garbage). Just create a - // new IndexFileDeleter, have it delete - // unreferenced files, then verify that in fact - // no files were deleted: - String[] startFiles = dir.list(); - SegmentInfos infos = new SegmentInfos(); - infos.read(dir); - IndexFileDeleter d = new IndexFileDeleter(infos, dir); - d.findDeletableFiles(); - d.deleteFiles(); - String[] endFiles = dir.list(); - - Arrays.sort(startFiles); - Arrays.sort(endFiles); - - // for(int i=0;i 0) { - s += "\n "; - } - s += l[i]; - } - return s; - } -} Index: src/java/org/apache/lucene/index/NewIndexModifier.java =================================================================== --- src/java/org/apache/lucene/index/NewIndexModifier.java (revision 505296) +++ src/java/org/apache/lucene/index/NewIndexModifier.java (working copy) @@ -1,294 +0,0 @@ -package org.apache.lucene.index; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.store.Directory; - -import java.io.File; -import java.io.IOException; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map.Entry; - -/** - * NewIndexModifier extends {@link IndexWriter} so that you can not only insert - * documents but also delete documents through a single interface. Internally, - * inserts and deletes are buffered before they are flushed to disk. - *

- * Design Overview - *

- * deleteDocuments() method works by buffering terms to be deleted. Deletes are - * deferred until ram is flushed to disk, either because enough new documents or - * delete terms are buffered, or because close() or flush() is called. Using - * Java synchronization, care is taken to ensure that an interleaved sequence of - * inserts and deletes for the same document are properly serialized. - */ - -public class NewIndexModifier extends IndexWriter { - // number of ram segments a delete term applies to - private class Num { - private int num; - - Num(int num) { - this.num = num; - } - - int getNum() { - return num; - } - - void setNum(int num) { - this.num = num; - } - } - - /** - * Default value is 10. Change using {@link #setMaxBufferedDeleteTerms(int)}. - */ - public final static int DEFAULT_MAX_BUFFERED_DELETE_TERMS = 10; - // the max number of delete terms that can be buffered before - // they must be flushed to disk - private int maxBufferedDeleteTerms = DEFAULT_MAX_BUFFERED_DELETE_TERMS; - - // to buffer delete terms in ram before they are applied - // key is delete term, value is number of ram segments the term applies to - private HashMap bufferedDeleteTerms = new HashMap(); - private int numBufferedDeleteTerms = 0; - - /** - * @see IndexWriter#IndexWriter(String, Analyzer, boolean) - */ - public NewIndexModifier(String path, Analyzer a, boolean create) - throws IOException { - super(path, a, create); - } - - /** - * @see IndexWriter#IndexWriter(File, Analyzer, boolean) - */ - public NewIndexModifier(File path, Analyzer a, boolean create) - throws IOException { - super(path, a, create); - } - - /** - * @see IndexWriter#IndexWriter(Directory, Analyzer, boolean) - */ - public NewIndexModifier(Directory d, Analyzer a, boolean create) - throws IOException { - super(d, a, create); - } - - /** - * @see IndexWriter#IndexWriter(String, Analyzer) - */ - public NewIndexModifier(String path, Analyzer a) throws IOException { - super(path, a); - } - - /** - * @see IndexWriter#IndexWriter(File, Analyzer) - */ - public NewIndexModifier(File path, Analyzer a) throws IOException { - super(path, a); - } - - /** - * @see IndexWriter#IndexWriter(Directory, Analyzer) - */ - public NewIndexModifier(Directory d, Analyzer a) throws IOException { - super(d, a); - } - - /** - * Determines the minimal number of delete terms required before the buffered - * in-memory delete terms are applied and flushed. If there are documents - * buffered in memory at the time, they are merged and a new Segment is - * created. The delete terms are applied appropriately. - *

- * The default value is 10. - * @throws IllegalArgumentException if maxBufferedDeleteTerms is smaller than - * 1 - */ - public void setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) { - if (maxBufferedDeleteTerms < 1) - throw new IllegalArgumentException("maxBufferedDeleteTerms must at least be 1"); - this.maxBufferedDeleteTerms = maxBufferedDeleteTerms; - } - - /** - * @see #setMaxBufferedDeleteTerms - */ - public int getMaxBufferedDeleteTerms() { - return maxBufferedDeleteTerms; - } - - // for test purpose - final synchronized int getBufferedDeleteTermsSize() { - return bufferedDeleteTerms.size(); - } - - // for test purpose - final synchronized int getNumBufferedDeleteTerms() { - return numBufferedDeleteTerms; - } - - /** - * Updates a document by first deleting all documents containing - * term and then adding the new document. - */ - public void updateDocument(Term term, Document doc) throws IOException { - updateDocument(term, doc, getAnalyzer()); - } - - /** - * Updates a document by first deleting all documents containing - * term and then adding the new document. - */ - public void updateDocument(Term term, Document doc, Analyzer analyzer) - throws IOException { - SegmentInfo newSegmentInfo = buildSingleDocSegment(doc, analyzer); - synchronized (this) { - bufferDeleteTerm(term); - ramSegmentInfos.addElement(newSegmentInfo); - maybeFlushRamSegments(); - } - } - - /** - * Deletes all documents containing term. - */ - public synchronized void deleteDocuments(Term term) throws IOException { - bufferDeleteTerm(term); - maybeFlushRamSegments(); - } - - /** - * Deletes all documents containing any of the terms. All deletes are flushed - * at the same time. - */ - public synchronized void deleteDocuments(Term[] terms) throws IOException { - for (int i = 0; i < terms.length; i++) { - bufferDeleteTerm(terms[i]); - } - maybeFlushRamSegments(); - } - - // buffer a term in bufferedDeleteTerms. bufferedDeleteTerms also records - // the current number of documents buffered in ram so that the delete term - // will be applied to those ram segments as well as the disk segments - private void bufferDeleteTerm(Term term) { - Num num = (Num)bufferedDeleteTerms.get(term); - if (num == null) { - bufferedDeleteTerms.put(term, new Num(getRAMSegmentCount())); - } else { - num.setNum(getRAMSegmentCount()); - } - numBufferedDeleteTerms++; - } - - // a flush is triggered if enough new documents are buffered or - // if enough delete terms are buffered - protected boolean timeToFlushRam() { - return super.timeToFlushRam() - || numBufferedDeleteTerms >= maxBufferedDeleteTerms; - } - - protected boolean anythingToFlushRam() { - return super.anythingToFlushRam() || bufferedDeleteTerms.size() > 0; - } - - protected boolean onlyRamDocsToFlush() { - return super.onlyRamDocsToFlush() && bufferedDeleteTerms.size() == 0; - } - - protected void doAfterFlushRamSegments(boolean flushedRamSegments) - throws IOException { - if (bufferedDeleteTerms.size() > 0) { - if (getInfoStream() != null) - getInfoStream().println( - "flush " + numBufferedDeleteTerms + " buffered terms on " - + segmentInfos.size() + " segments."); - - if (flushedRamSegments) { - IndexReader reader = null; - try { - reader = SegmentReader.get(segmentInfos.info(segmentInfos.size() - 1)); - reader.setDeleter(getDeleter()); - - // apply delete terms to the segment just flushed from ram - // apply appropriately so that a delete term is only applied to - // the documents buffered before it, not those buffered after it - applyDeletesSelectively(bufferedDeleteTerms, reader); - } finally { - if (reader != null) - reader.close(); - } - } - - int infosEnd = segmentInfos.size(); - if (flushedRamSegments) { - infosEnd--; - } - - for (int i = 0; i < infosEnd; i++) { - IndexReader reader = null; - try { - reader = SegmentReader.get(segmentInfos.info(i)); - reader.setDeleter(getDeleter()); - - // apply delete terms to disk segments - // except the one just flushed from ram - applyDeletes(bufferedDeleteTerms, reader); - } finally { - if (reader != null) - reader.close(); - } - } - - // clean up bufferedDeleteTerms - bufferedDeleteTerms.clear(); - numBufferedDeleteTerms = 0; - } - } - - // apply buffered delete terms to the segment just flushed from ram - // apply appropriately so that a delete term is only applied to - // the documents buffered before it, not those buffered after it - private final void applyDeletesSelectively(HashMap deleteTerms, - IndexReader reader) throws IOException { - Iterator iter = deleteTerms.entrySet().iterator(); - while (iter.hasNext()) { - Entry entry = (Entry)iter.next(); - Term term = (Term)entry.getKey(); - - TermDocs docs = reader.termDocs(term); - if (docs != null) { - int num = ((Num)entry.getValue()).getNum(); - try { - while (docs.next()) { - int doc = docs.doc(); - if (doc >= num) { - break; - } - reader.deleteDocument(doc); - } - } finally { - docs.close(); - } - } - } - } - - // apply buffered delete terms to disk segments - // except the one just flushed from ram - private final void applyDeletes(HashMap deleteTerms, IndexReader reader) - throws IOException { - Iterator iter = deleteTerms.entrySet().iterator(); - while (iter.hasNext()) { - Entry entry = (Entry)iter.next(); - Term term = (Term)entry.getKey(); - reader.deleteDocuments(term); - } - } -} Index: src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- src/java/org/apache/lucene/index/IndexWriter.java (revision 505296) +++ src/java/org/apache/lucene/index/IndexWriter.java (working copy) @@ -32,6 +32,9 @@ import java.io.PrintStream; import java.util.Vector; import java.util.HashSet; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map.Entry; /** An IndexWriter creates and maintains an index. @@ -58,7 +61,16 @@ is also thrown if an IndexReader on the same directory is used to delete documents from the index.

- @see IndexModifier IndexModifier supports the important methods of IndexWriter plus deletion +

As of 2.1, IndexWriter can now delete documents + by {@link Term} (see {@link #deleteDocuments} ) and update + (delete then add) documents (see {@link #updateDocument}). + Deletes are buffered until {@link + #setMaxBufferedDeleteTerms} Terms at which + point they are flushed to the index. Note that a flush + occurs when there are enough buffered deletes or enough + added documents, whichever is sooner. When a flush + occurs, both pending deletes and added documents are + flushed to the index.

*/ public class IndexWriter { @@ -84,6 +96,11 @@ public final static int DEFAULT_MAX_BUFFERED_DOCS = 10; /** + * Default value is 1000. Change using {@link #setMaxBufferedDeleteTerms(int)}. + */ + public final static int DEFAULT_MAX_BUFFERED_DELETE_TERMS = 1000; + + /** * Default value is {@link Integer#MAX_VALUE}. Change using {@link #setMaxMergeDocs(int)}. */ public final static int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE; @@ -108,8 +125,8 @@ private HashSet protectedSegments; // segment names that should not be deleted until commit private SegmentInfos rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails - protected SegmentInfos segmentInfos = new SegmentInfos(); // the segments - protected SegmentInfos ramSegmentInfos = new SegmentInfos(); // the segments in ramDirectory + private SegmentInfos segmentInfos = new SegmentInfos(); // the segments + private SegmentInfos ramSegmentInfos = new SegmentInfos(); // the segments in ramDirectory private final RAMDirectory ramDirectory = new RAMDirectory(); // for temp segs private IndexFileDeleter deleter; @@ -117,6 +134,16 @@ private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL; + // The max number of delete terms that can be buffered before + // they must be flushed to disk. + private int maxBufferedDeleteTerms = DEFAULT_MAX_BUFFERED_DELETE_TERMS; + + // This Hashmap buffers delete terms in ram before they are applied. + // The key is delete term; the value is number of ram + // segments the term applies to. + private HashMap bufferedDeleteTerms = new HashMap(); + private int numBufferedDeleteTerms = 0; + /** Use compound file setting. Defaults to true, minimizing the number of * files used. Setting this to false may improve indexing performance, but * may also cause file handle problems. @@ -125,10 +152,6 @@ private boolean closeDir; - protected IndexFileDeleter getDeleter() { - return deleter; - } - /** Get the current setting of whether to use the compound file format. * Note that this just returns the value you set with setUseCompoundFile(boolean) * or the default. You cannot use this to query the status of an existing index. @@ -440,6 +463,28 @@ return minMergeDocs; } + /** + *

Determines the minimal number of delete terms required before the buffered + * in-memory delete terms are applied and flushed. If there are documents + * buffered in memory at the time, they are merged and a new segment is + * created.

+ + *

The default value is {@link #DEFAULT_MAX_BUFFERED_DELETE_TERMS}. + * @throws IllegalArgumentException if maxBufferedDeleteTerms is smaller than 1

+ */ + public void setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) { + if (maxBufferedDeleteTerms < 1) + throw new IllegalArgumentException("maxBufferedDeleteTerms must at least be 1"); + this.maxBufferedDeleteTerms = maxBufferedDeleteTerms; + } + + /** + * @see #setMaxBufferedDeleteTerms + */ + public int getMaxBufferedDeleteTerms() { + return maxBufferedDeleteTerms; + } + /** Determines how often segment indices are merged by addDocument(). With * smaller values, less RAM is used while indexing, and searches on * unoptimized indices are faster, but indexing speed is slower. With larger @@ -653,21 +698,73 @@ } } - final SegmentInfo buildSingleDocSegment(Document doc, Analyzer analyzer) + final private SegmentInfo buildSingleDocSegment(Document doc, Analyzer analyzer) throws IOException { DocumentWriter dw = new DocumentWriter(ramDirectory, analyzer, this); dw.setInfoStream(infoStream); - String segmentName = newRAMSegmentName(); + String segmentName = newRamSegmentName(); dw.addDocument(segmentName, doc); return new SegmentInfo(segmentName, 1, ramDirectory, false, false); } - // for test purpose - final synchronized int getRAMSegmentCount() { - return ramSegmentInfos.size(); + /** + * Deletes the document(s) containing term. + * @param term the term to identify the documents to be deleted + */ + public synchronized void deleteDocuments(Term term) throws IOException { + bufferDeleteTerm(term); + maybeFlushRamSegments(); } - final synchronized String newRAMSegmentName() { + /** + * Deletes the document(s) containing any of the + * terms. All deletes are flushed at the same time. + * @param terms array of terms to identify the documents + * to be deleted + */ + public synchronized void deleteDocuments(Term[] terms) throws IOException { + for (int i = 0; i < terms.length; i++) { + bufferDeleteTerm(terms[i]); + } + maybeFlushRamSegments(); + } + + /** + * Updates a document by first deleting the document(s) + * containing term and then adding the new + * document. The delete and then add are atomic as seen + * by a reader on the same index (flush may happen only after + * the add). + * @param term the term to identify the document(s) to be + * deleted + * @param doc the document to be added + */ + public void updateDocument(Term term, Document doc) throws IOException { + updateDocument(term, doc, getAnalyzer()); + } + + /** + * Updates a document by first deleting the document(s) + * containing term and then adding the new + * document. The delete and then add are atomic as seen + * by a reader on the same index (flush may happen only after + * the add). + * @param term the term to identify the document(s) to be + * deleted + * @param doc the document to be added + * @param analyzer the analyzer to use when analyzing the document + */ + public void updateDocument(Term term, Document doc, Analyzer analyzer) + throws IOException { + SegmentInfo newSegmentInfo = buildSingleDocSegment(doc, analyzer); + synchronized (this) { + bufferDeleteTerm(term); + ramSegmentInfos.addElement(newSegmentInfo); + maybeFlushRamSegments(); + } + } + + final synchronized String newRamSegmentName() { return "_ram_" + Integer.toString(ramSegmentInfos.counter++, Character.MAX_RADIX); } @@ -677,6 +774,11 @@ } // for test purpose + final synchronized int getRamSegmentCount(){ + return ramSegmentInfos.size(); + } + + // for test purpose final synchronized int getDocCount(int i) { if (i >= 0 && i < segmentInfos.size()) { return segmentInfos.info(i).docCount; @@ -1228,40 +1330,32 @@ // counts x and y, then f(x) >= f(y). // 2: The number of committed segments on the same level (f(n)) <= M. - protected boolean timeToFlushRam() { - return ramSegmentInfos.size() >= minMergeDocs; + // This is called after pending added documents have been + // flushed to the Directory but before the change is + // committed (new segments_N file written). + void doAfterFlush() + throws IOException { } - protected boolean anythingToFlushRam() { - return ramSegmentInfos.size() > 0; - } - - // true if only buffered inserts, no buffered deletes - protected boolean onlyRamDocsToFlush() { - return true; - } - - // whether the latest segment is the flushed merge of ram segments - protected void doAfterFlushRamSegments(boolean flushedRamSegments) - throws IOException { - } - protected final void maybeFlushRamSegments() throws IOException { - if (timeToFlushRam()) { + // A flush is triggered if enough new documents are buffered or + // if enough delete terms are buffered + if (ramSegmentInfos.size() >= minMergeDocs || numBufferedDeleteTerms >= maxBufferedDeleteTerms) { flushRamSegments(); } } /** Expert: Flushes all RAM-resident segments (buffered documents), then may merge segments. */ private final synchronized void flushRamSegments() throws IOException { - if (anythingToFlushRam()) { + if (ramSegmentInfos.size() > 0 || bufferedDeleteTerms.size() > 0) { mergeSegments(ramSegmentInfos, 0, ramSegmentInfos.size()); maybeMergeSegments(minMergeDocs); } } /** - * Flush all in-memory buffered updates to the Directory. + * Flush all in-memory buffered updates (adds and deletes) + * to the Directory. * @throws IOException */ public final synchronized void flush() throws IOException { @@ -1350,7 +1444,9 @@ private final int mergeSegments(SegmentInfos sourceSegments, int minSegment, int end) throws IOException { - boolean mergeFlag = end > 0; + // We may be called solely because there are deletes + // pending, in which case doMerge is false: + boolean doMerge = end > 0; final String mergedName = newSegmentName(); SegmentMerger merger = null; @@ -1366,21 +1462,21 @@ // This is try/finally to make sure merger's readers are closed: try { - if (mergeFlag) { - if (infoStream != null) infoStream.print("merging segments"); - merger = new SegmentMerger(this, mergedName); + if (doMerge) { + if (infoStream != null) infoStream.print("merging segments"); + merger = new SegmentMerger(this, mergedName); - for (int i = minSegment; i < end; i++) { - SegmentInfo si = sourceSegments.info(i); - if (infoStream != null) - infoStream.print(" " + si.name + " (" + si.docCount + " docs)"); - IndexReader reader = SegmentReader.get(si); // no need to set deleter (yet) - merger.add(reader); - if ((reader.directory() == this.directory) || // if we own the directory - (reader.directory() == this.ramDirectory)) - segmentsToDelete.addElement(reader); // queue segment for deletion + for (int i = minSegment; i < end; i++) { + SegmentInfo si = sourceSegments.info(i); + if (infoStream != null) + infoStream.print(" " + si.name + " (" + si.docCount + " docs)"); + IndexReader reader = SegmentReader.get(si); // no need to set deleter (yet) + merger.add(reader); + if ((reader.directory() == this.directory) || // if we own the directory + (reader.directory() == this.ramDirectory)) + segmentsToDelete.addElement(reader); // queue segment for deletion + } } - } SegmentInfos rollback = null; boolean success = false; @@ -1389,40 +1485,41 @@ // if we hit exception when doing the merge: try { - if (mergeFlag) { - mergedDocCount = merger.merge(); + if (doMerge) { + mergedDocCount = merger.merge(); - if (infoStream != null) { - infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)"); + if (infoStream != null) { + infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)"); + } + + newSegment = new SegmentInfo(mergedName, mergedDocCount, + directory, false, true); } - newSegment = new SegmentInfo(mergedName, mergedDocCount, - directory, false, true); - } - if (!inTransaction - && (sourceSegments != ramSegmentInfos || !onlyRamDocsToFlush())) { + && (sourceSegments != ramSegmentInfos || bufferedDeleteTerms.size() > 0)) { // Now save the SegmentInfo instances that // we are replacing: rollback = (SegmentInfos) segmentInfos.clone(); } - if (mergeFlag) { - if (sourceSegments == ramSegmentInfos) { - segmentInfos.addElement(newSegment); - } else { - for (int i = end-1; i > minSegment; i--) // remove old infos & add new - sourceSegments.remove(i); + if (doMerge) { + if (sourceSegments == ramSegmentInfos) { + segmentInfos.addElement(newSegment); + } else { + for (int i = end-1; i > minSegment; i--) // remove old infos & add new + sourceSegments.remove(i); - segmentInfos.set(minSegment, newSegment); + segmentInfos.set(minSegment, newSegment); + } } - } if (sourceSegments == ramSegmentInfos) { // Should not be necessary: no prior commit should // have left pending files, so just defensive: deleter.clearPendingFiles(); - doAfterFlushRamSegments(mergeFlag); + maybeApplyDeletes(doMerge); + doAfterFlush(); } if (!inTransaction) { @@ -1446,7 +1543,7 @@ // Must rollback so our state matches index: - if (sourceSegments == ramSegmentInfos && onlyRamDocsToFlush()) { + if (sourceSegments == ramSegmentInfos && 0 == bufferedDeleteTerms.size()) { // Simple case: newSegment may or may not have // been added to the end of our segment infos, // so just check & remove if so: @@ -1476,21 +1573,21 @@ } } finally { // close readers before we attempt to delete now-obsolete segments - if (mergeFlag) merger.closeReaders(); + if (doMerge) merger.closeReaders(); } if (!inTransaction) { // Attempt to delete all files we just obsoleted: deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file deleter.deleteSegments(segmentsToDelete); // delete now-unused segments - // including the old del files + // Includes the old del files deleter.commitPendingFiles(); } else { deleter.addPendingFile(segmentsInfosFileName); // delete old segments_N file deleter.deleteSegments(segmentsToDelete, protectedSegments); // delete now-unused segments } - if (useCompoundFile && mergeFlag) { + if (useCompoundFile && doMerge) { segmentsInfosFileName = nextSegmentsFileName; nextSegmentsFileName = segmentInfos.getNextSegmentFileName(); @@ -1531,6 +1628,56 @@ return mergedDocCount; } + // Called during flush to apply any buffered deletes. + private final void maybeApplyDeletes(boolean doMerge) throws IOException { + + if (bufferedDeleteTerms.size() > 0) { + if (infoStream != null) + infoStream.println("flush " + numBufferedDeleteTerms + " buffered terms on " + + segmentInfos.size() + " segments."); + + if (doMerge) { + IndexReader reader = null; + try { + reader = SegmentReader.get(segmentInfos.info(segmentInfos.size() - 1)); + reader.setDeleter(deleter); + + // Apply delete terms to the segment just flushed from ram + // apply appropriately so that a delete term is only applied to + // the documents buffered before it, not those buffered after it. + applyDeletesSelectively(bufferedDeleteTerms, reader); + } finally { + if (reader != null) + reader.close(); + } + } + + int infosEnd = segmentInfos.size(); + if (doMerge) { + infosEnd--; + } + + for (int i = 0; i < infosEnd; i++) { + IndexReader reader = null; + try { + reader = SegmentReader.get(segmentInfos.info(i)); + reader.setDeleter(deleter); + + // Apply delete terms to disk segments + // except the one just flushed from ram. + applyDeletes(bufferedDeleteTerms, reader); + } finally { + if (reader != null) + reader.close(); + } + } + + // Clean up bufferedDeleteTerms. + bufferedDeleteTerms.clear(); + numBufferedDeleteTerms = 0; + } + } + private final boolean checkNonDecreasingLevels(int start) { int lowerBound = -1; int upperBound = minMergeDocs; @@ -1548,4 +1695,83 @@ } return true; } + + // For test purposes. + final synchronized int getBufferedDeleteTermsSize() { + return bufferedDeleteTerms.size(); + } + + // For test purposes. + final synchronized int getNumBufferedDeleteTerms() { + return numBufferedDeleteTerms; + } + + // Number of ram segments a delete term applies to. + private class Num { + private int num; + + Num(int num) { + this.num = num; + } + + int getNum() { + return num; + } + + void setNum(int num) { + this.num = num; + } + } + + // Buffer a term in bufferedDeleteTerms, which records the + // current number of documents buffered in ram so that the + // delete term will be applied to those ram segments as + // well as the disk segments. + private void bufferDeleteTerm(Term term) { + Num num = (Num) bufferedDeleteTerms.get(term); + if (num == null) { + bufferedDeleteTerms.put(term, new Num(ramSegmentInfos.size())); + } else { + num.setNum(ramSegmentInfos.size()); + } + numBufferedDeleteTerms++; + } + + // Apply buffered delete terms to the segment just flushed from ram + // apply appropriately so that a delete term is only applied to + // the documents buffered before it, not those buffered after it. + private final void applyDeletesSelectively(HashMap deleteTerms, + IndexReader reader) throws IOException { + Iterator iter = deleteTerms.entrySet().iterator(); + while (iter.hasNext()) { + Entry entry = (Entry) iter.next(); + Term term = (Term) entry.getKey(); + + TermDocs docs = reader.termDocs(term); + if (docs != null) { + int num = ((Num) entry.getValue()).getNum(); + try { + while (docs.next()) { + int doc = docs.doc(); + if (doc >= num) { + break; + } + reader.deleteDocument(doc); + } + } finally { + docs.close(); + } + } + } + } + + // Apply buffered delete terms to this reader. + private final void applyDeletes(HashMap deleteTerms, IndexReader reader) + throws IOException { + Iterator iter = deleteTerms.entrySet().iterator(); + while (iter.hasNext()) { + Entry entry = (Entry) iter.next(); + reader.deleteDocuments((Term) entry.getKey()); + } + } } Index: src/java/org/apache/lucene/index/IndexModifier.java =================================================================== --- src/java/org/apache/lucene/index/IndexModifier.java (revision 505296) +++ src/java/org/apache/lucene/index/IndexModifier.java (working copy) @@ -27,6 +27,11 @@ import java.io.PrintStream; /** + *

[Note that as of 2.1, all but one of the + * methods in this class are available via {@link + * IndexWriter}. The one method that is not available is + * {@link #deleteDocument(int)}.]

+ * * A class to modify an index, i.e. to delete and add documents. This * class hides {@link IndexReader} and {@link IndexWriter} so that you * do not need to care about implementation details such as that adding