Index: CHANGES.txt =================================================================== --- CHANGES.txt (revision 516341) +++ CHANGES.txt (working copy) @@ -20,6 +20,15 @@ classes, package-private again (they were unnecessarily made public as part of LUCENE-701). (Mike McCandless) + 3. LUCENE-710: added optional autoCommit boolean to IndexWriter + constructors. When this is false, index changes are not committed + until the writer is closed. This gives explicit control over when + a reader will see the changes. Also added optional custom + deletion policy to explicitly control when prior commits are + removed from the index. This is intended to allow applications to + share an index over NFS by customizing when prior commits are + deleted. (Mike McCandless) + Bug fixes 1. LUCENE-804: Fixed build.xml to pack a fully compilable src dist. (Doron Cohen) Index: src/test/org/apache/lucene/store/MockRAMOutputStream.java =================================================================== --- src/test/org/apache/lucene/store/MockRAMOutputStream.java (revision 516341) +++ src/test/org/apache/lucene/store/MockRAMOutputStream.java (working copy) @@ -68,7 +68,7 @@ if (realUsage > dir.maxUsedSize) { dir.maxUsedSize = realUsage; } - throw new IOException("fake disk full at " + dir.sizeInBytes() + " bytes"); + throw new IOException("fake disk full at " + dir.getRecomputedActualSizeInBytes() + " bytes"); } else { super.flushBuffer(src, len); } Index: src/test/org/apache/lucene/index/TestIndexWriterDelete.java =================================================================== --- src/test/org/apache/lucene/index/TestIndexWriterDelete.java (revision 516341) +++ src/test/org/apache/lucene/index/TestIndexWriterDelete.java (working copy) @@ -25,175 +25,259 @@ "Venice has lots of canals" }; String[] text = { "Amsterdam", "Venice" }; - Directory dir = new RAMDirectory(); - IndexWriter modifier = new IndexWriter(dir, - new WhitespaceAnalyzer(), true); - modifier.setUseCompoundFile(true); - modifier.setMaxBufferedDeleteTerms(1); + for(int pass=0;pass<2;pass++) { + boolean autoCommit = (0==pass); - for (int i = 0; i < keywords.length; i++) { - Document doc = new Document(); - doc.add(new Field("id", keywords[i], Field.Store.YES, - Field.Index.UN_TOKENIZED)); - doc.add(new Field("country", unindexed[i], Field.Store.YES, - Field.Index.NO)); - doc.add(new Field("contents", unstored[i], Field.Store.NO, - Field.Index.TOKENIZED)); - doc + Directory dir = new RAMDirectory(); + IndexWriter modifier = new IndexWriter(dir, autoCommit, + new WhitespaceAnalyzer(), true); + modifier.setUseCompoundFile(true); + modifier.setMaxBufferedDeleteTerms(1); + + for (int i = 0; i < keywords.length; i++) { + Document doc = new Document(); + doc.add(new Field("id", keywords[i], Field.Store.YES, + Field.Index.UN_TOKENIZED)); + doc.add(new Field("country", unindexed[i], Field.Store.YES, + Field.Index.NO)); + doc.add(new Field("contents", unstored[i], Field.Store.NO, + Field.Index.TOKENIZED)); + doc .add(new Field("city", text[i], Field.Store.YES, - Field.Index.TOKENIZED)); - modifier.addDocument(doc); - } - modifier.optimize(); + Field.Index.TOKENIZED)); + modifier.addDocument(doc); + } + modifier.optimize(); - Term term = new Term("city", "Amsterdam"); - int hitCount = getHitCount(dir, term); - assertEquals(1, hitCount); - modifier.deleteDocuments(term); - hitCount = getHitCount(dir, term); - assertEquals(0, hitCount); + if (!autoCommit) { + modifier.close(); + } - modifier.close(); + Term term = new Term("city", "Amsterdam"); + int hitCount = getHitCount(dir, term); + assertEquals(1, hitCount); + if (!autoCommit) { + modifier = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer()); + modifier.setUseCompoundFile(true); + } + modifier.deleteDocuments(term); + if (!autoCommit) { + modifier.close(); + } + hitCount = getHitCount(dir, term); + assertEquals(0, hitCount); + + if (autoCommit) { + modifier.close(); + } + dir.close(); + } } // test when delete terms only apply to disk segments public void testNonRAMDelete() throws IOException { - Directory dir = new RAMDirectory(); - IndexWriter modifier = new IndexWriter(dir, - new WhitespaceAnalyzer(), true); - modifier.setMaxBufferedDocs(2); - modifier.setMaxBufferedDeleteTerms(2); + for(int pass=0;pass<2;pass++) { + boolean autoCommit = (0==pass); - int id = 0; - int value = 100; + Directory dir = new RAMDirectory(); + IndexWriter modifier = new IndexWriter(dir, autoCommit, + new WhitespaceAnalyzer(), true); + modifier.setMaxBufferedDocs(2); + modifier.setMaxBufferedDeleteTerms(2); - for (int i = 0; i < 7; i++) { - addDoc(modifier, ++id, value); - } - modifier.flush(); + int id = 0; + int value = 100; - assertEquals(0, modifier.getRamSegmentCount()); - assertTrue(0 < modifier.getSegmentCount()); + for (int i = 0; i < 7; i++) { + addDoc(modifier, ++id, value); + } + modifier.flush(); - IndexReader reader = IndexReader.open(dir); - assertEquals(7, reader.numDocs()); - reader.close(); + assertEquals(0, modifier.getRamSegmentCount()); + assertTrue(0 < modifier.getSegmentCount()); - modifier.deleteDocuments(new Term("value", String.valueOf(value))); - modifier.deleteDocuments(new Term("value", String.valueOf(value))); + if (!autoCommit) { + modifier.close(); + } - reader = IndexReader.open(dir); - assertEquals(0, reader.numDocs()); - reader.close(); + IndexReader reader = IndexReader.open(dir); + assertEquals(7, reader.numDocs()); + reader.close(); - modifier.close(); + if (!autoCommit) { + modifier = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer()); + modifier.setMaxBufferedDocs(2); + modifier.setMaxBufferedDeleteTerms(2); + } + + modifier.deleteDocuments(new Term("value", String.valueOf(value))); + modifier.deleteDocuments(new Term("value", String.valueOf(value))); + + if (!autoCommit) { + modifier.close(); + } + + reader = IndexReader.open(dir); + assertEquals(0, reader.numDocs()); + reader.close(); + if (autoCommit) { + modifier.close(); + } + dir.close(); + } } // test when delete terms only apply to ram segments public void testRAMDeletes() throws IOException { - Directory dir = new RAMDirectory(); - IndexWriter modifier = new IndexWriter(dir, - new WhitespaceAnalyzer(), true); - modifier.setMaxBufferedDocs(4); - modifier.setMaxBufferedDeleteTerms(4); + for(int pass=0;pass<2;pass++) { + boolean autoCommit = (0==pass); + Directory dir = new RAMDirectory(); + IndexWriter modifier = new IndexWriter(dir, autoCommit, + new WhitespaceAnalyzer(), true); + modifier.setMaxBufferedDocs(4); + modifier.setMaxBufferedDeleteTerms(4); - int id = 0; - int value = 100; + int id = 0; + int value = 100; - addDoc(modifier, ++id, value); - modifier.deleteDocuments(new Term("value", String.valueOf(value))); - addDoc(modifier, ++id, value); - modifier.deleteDocuments(new Term("value", String.valueOf(value))); + addDoc(modifier, ++id, value); + modifier.deleteDocuments(new Term("value", String.valueOf(value))); + addDoc(modifier, ++id, value); + modifier.deleteDocuments(new Term("value", String.valueOf(value))); - assertEquals(2, modifier.getNumBufferedDeleteTerms()); - assertEquals(1, modifier.getBufferedDeleteTermsSize()); + assertEquals(2, modifier.getNumBufferedDeleteTerms()); + assertEquals(1, modifier.getBufferedDeleteTermsSize()); - addDoc(modifier, ++id, value); - assertEquals(0, modifier.getSegmentCount()); - modifier.flush(); + addDoc(modifier, ++id, value); + assertEquals(0, modifier.getSegmentCount()); + modifier.flush(); - IndexReader reader = IndexReader.open(dir); - assertEquals(1, reader.numDocs()); + if (!autoCommit) { + modifier.close(); + } - int hitCount = getHitCount(dir, new Term("id", String.valueOf(id))); - assertEquals(1, hitCount); - reader.close(); + IndexReader reader = IndexReader.open(dir); + assertEquals(1, reader.numDocs()); - modifier.close(); + int hitCount = getHitCount(dir, new Term("id", String.valueOf(id))); + assertEquals(1, hitCount); + reader.close(); + if (autoCommit) { + modifier.close(); + } + dir.close(); + } } // test when delete terms apply to both disk and ram segments public void testBothDeletes() throws IOException { - Directory dir = new RAMDirectory(); - IndexWriter modifier = new IndexWriter(dir, - new WhitespaceAnalyzer(), true); - modifier.setMaxBufferedDocs(100); - modifier.setMaxBufferedDeleteTerms(100); + for(int pass=0;pass<2;pass++) { + boolean autoCommit = (0==pass); - int id = 0; - int value = 100; + Directory dir = new RAMDirectory(); + IndexWriter modifier = new IndexWriter(dir, autoCommit, + new WhitespaceAnalyzer(), true); + modifier.setMaxBufferedDocs(100); + modifier.setMaxBufferedDeleteTerms(100); - for (int i = 0; i < 5; i++) { - addDoc(modifier, ++id, value); - } + int id = 0; + int value = 100; - value = 200; - for (int i = 0; i < 5; i++) { - addDoc(modifier, ++id, value); - } - modifier.flush(); + for (int i = 0; i < 5; i++) { + addDoc(modifier, ++id, value); + } - for (int i = 0; i < 5; i++) { - addDoc(modifier, ++id, value); - } - modifier.deleteDocuments(new Term("value", String.valueOf(value))); - modifier.flush(); + value = 200; + for (int i = 0; i < 5; i++) { + addDoc(modifier, ++id, value); + } + modifier.flush(); - IndexReader reader = IndexReader.open(dir); - assertEquals(5, reader.numDocs()); + for (int i = 0; i < 5; i++) { + addDoc(modifier, ++id, value); + } + modifier.deleteDocuments(new Term("value", String.valueOf(value))); - modifier.close(); + modifier.flush(); + if (!autoCommit) { + modifier.close(); + } + + IndexReader reader = IndexReader.open(dir); + assertEquals(5, reader.numDocs()); + if (autoCommit) { + modifier.close(); + } + } } // test that batched delete terms are flushed together public void testBatchDeletes() throws IOException { - Directory dir = new RAMDirectory(); - IndexWriter modifier = new IndexWriter(dir, - new WhitespaceAnalyzer(), true); - modifier.setMaxBufferedDocs(2); - modifier.setMaxBufferedDeleteTerms(2); + for(int pass=0;pass<2;pass++) { + boolean autoCommit = (0==pass); + Directory dir = new RAMDirectory(); + IndexWriter modifier = new IndexWriter(dir, autoCommit, + new WhitespaceAnalyzer(), true); + modifier.setMaxBufferedDocs(2); + modifier.setMaxBufferedDeleteTerms(2); - int id = 0; - int value = 100; + int id = 0; + int value = 100; - for (int i = 0; i < 7; i++) { - addDoc(modifier, ++id, value); - } - modifier.flush(); + for (int i = 0; i < 7; i++) { + addDoc(modifier, ++id, value); + } + modifier.flush(); + if (!autoCommit) { + modifier.close(); + } - IndexReader reader = IndexReader.open(dir); - assertEquals(7, reader.numDocs()); - reader.close(); + IndexReader reader = IndexReader.open(dir); + assertEquals(7, reader.numDocs()); + reader.close(); + + if (!autoCommit) { + modifier = new IndexWriter(dir, autoCommit, + new WhitespaceAnalyzer()); + modifier.setMaxBufferedDocs(2); + modifier.setMaxBufferedDeleteTerms(2); + } - id = 0; - modifier.deleteDocuments(new Term("id", String.valueOf(++id))); - modifier.deleteDocuments(new Term("id", String.valueOf(++id))); + id = 0; + modifier.deleteDocuments(new Term("id", String.valueOf(++id))); + modifier.deleteDocuments(new Term("id", String.valueOf(++id))); - reader = IndexReader.open(dir); - assertEquals(5, reader.numDocs()); - reader.close(); + if (!autoCommit) { + modifier.close(); + } - Term[] terms = new Term[3]; - for (int i = 0; i < terms.length; i++) { - terms[i] = new Term("id", String.valueOf(++id)); - } - modifier.deleteDocuments(terms); + reader = IndexReader.open(dir); + assertEquals(5, reader.numDocs()); + reader.close(); - reader = IndexReader.open(dir); - assertEquals(2, reader.numDocs()); - reader.close(); + Term[] terms = new Term[3]; + for (int i = 0; i < terms.length; i++) { + terms[i] = new Term("id", String.valueOf(++id)); + } + if (!autoCommit) { + modifier = new IndexWriter(dir, autoCommit, + new WhitespaceAnalyzer()); + modifier.setMaxBufferedDocs(2); + modifier.setMaxBufferedDeleteTerms(2); + } + modifier.deleteDocuments(terms); + if (!autoCommit) { + modifier.close(); + } + reader = IndexReader.open(dir); + assertEquals(2, reader.numDocs()); + reader.close(); - modifier.close(); + if (autoCommit) { + modifier.close(); + } + dir.close(); + } } private void addDoc(IndexWriter modifier, int id, int value) @@ -233,201 +317,203 @@ int START_COUNT = 157; int END_COUNT = 144; - // First build up a starting index: - RAMDirectory startDir = new RAMDirectory(); - IndexWriter writer = new IndexWriter(startDir, new WhitespaceAnalyzer(), - true); - for (int i = 0; i < 157; i++) { - Document d = new Document(); - d.add(new Field("id", Integer.toString(i), Field.Store.YES, - Field.Index.UN_TOKENIZED)); - d.add(new Field("content", "aaa " + i, Field.Store.NO, - Field.Index.TOKENIZED)); - writer.addDocument(d); - } - writer.close(); + for(int pass=0;pass<2;pass++) { + boolean autoCommit = (0==pass); - long diskUsage = startDir.sizeInBytes(); - long diskFree = diskUsage + 10; + // First build up a starting index: + RAMDirectory startDir = new RAMDirectory(); + IndexWriter writer = new IndexWriter(startDir, autoCommit, + new WhitespaceAnalyzer(), true); + for (int i = 0; i < 157; i++) { + Document d = new Document(); + d.add(new Field("id", Integer.toString(i), Field.Store.YES, + Field.Index.UN_TOKENIZED)); + d.add(new Field("content", "aaa " + i, Field.Store.NO, + Field.Index.TOKENIZED)); + writer.addDocument(d); + } + writer.close(); - IOException err = null; + long diskUsage = startDir.sizeInBytes(); + long diskFree = diskUsage + 10; - boolean done = false; + IOException err = null; - // Iterate w/ ever increasing free disk space: - while (!done) { - MockRAMDirectory dir = new MockRAMDirectory(startDir); - IndexWriter modifier = new IndexWriter(dir, - new WhitespaceAnalyzer(), false); + boolean done = false; - modifier.setMaxBufferedDocs(1000); // use flush or close - modifier.setMaxBufferedDeleteTerms(1000); // use flush or close + // Iterate w/ ever increasing free disk space: + while (!done) { + MockRAMDirectory dir = new MockRAMDirectory(startDir); + IndexWriter modifier = new IndexWriter(dir, autoCommit, + new WhitespaceAnalyzer()); - // For each disk size, first try to commit against - // dir that will hit random IOExceptions & disk - // full; after, give it infinite disk space & turn - // off random IOExceptions & retry w/ same reader: - boolean success = false; + modifier.setMaxBufferedDocs(1000); // use flush or close + modifier.setMaxBufferedDeleteTerms(1000); // use flush or close - for (int x = 0; x < 2; x++) { + // For each disk size, first try to commit against + // dir that will hit random IOExceptions & disk + // full; after, give it infinite disk space & turn + // off random IOExceptions & retry w/ same reader: + boolean success = false; - double rate = 0.1; - double diskRatio = ((double)diskFree) / diskUsage; - long thisDiskFree; - String testName; + for (int x = 0; x < 2; x++) { - if (0 == x) { - thisDiskFree = diskFree; - if (diskRatio >= 2.0) { - rate /= 2; - } - if (diskRatio >= 4.0) { - rate /= 2; - } - if (diskRatio >= 6.0) { + double rate = 0.1; + double diskRatio = ((double)diskFree) / diskUsage; + long thisDiskFree; + String testName; + + if (0 == x) { + thisDiskFree = diskFree; + if (diskRatio >= 2.0) { + rate /= 2; + } + if (diskRatio >= 4.0) { + rate /= 2; + } + if (diskRatio >= 6.0) { + rate = 0.0; + } + if (debug) { + System.out.println("\ncycle: " + diskFree + " bytes"); + } + testName = "disk full during reader.close() @ " + thisDiskFree + + " bytes"; + } else { + thisDiskFree = 0; rate = 0.0; + if (debug) { + System.out.println("\ncycle: same writer: unlimited disk space"); + } + testName = "reader re-use after disk full"; } - if (debug) { - System.out.println("\ncycle: " + diskFree + " bytes"); + + dir.setMaxSizeInBytes(thisDiskFree); + dir.setRandomIOExceptionRate(rate, diskFree); + + try { + if (0 == x) { + int docId = 12; + for (int i = 0; i < 13; i++) { + if (updates) { + Document d = new Document(); + d.add(new Field("id", Integer.toString(i), Field.Store.YES, + Field.Index.UN_TOKENIZED)); + d.add(new Field("content", "bbb " + i, Field.Store.NO, + Field.Index.TOKENIZED)); + modifier.updateDocument(new Term("id", Integer.toString(docId)), d); + } else { // deletes + modifier.deleteDocuments(new Term("id", Integer.toString(docId))); + // modifier.setNorm(docId, "contents", (float)2.0); + } + docId += 12; + } + } + modifier.close(); + success = true; + if (0 == x) { + done = true; + } } - testName = "disk full during reader.close() @ " + thisDiskFree - + " bytes"; - } else { - thisDiskFree = 0; - rate = 0.0; - if (debug) { - System.out.println("\ncycle: same writer: unlimited disk space"); + catch (IOException e) { + if (debug) { + System.out.println(" hit IOException: " + e); + } + err = e; + if (1 == x) { + e.printStackTrace(); + fail(testName + " hit IOException after disk space was freed up"); + } } - testName = "reader re-use after disk full"; - } - dir.setMaxSizeInBytes(thisDiskFree); - dir.setRandomIOExceptionRate(rate, diskFree); + // Whether we succeeded or failed, check that all + // un-referenced files were in fact deleted (ie, + // we did not create garbage). Just create a + // new IndexFileDeleter, have it delete + // unreferenced files, then verify that in fact + // no files were deleted: + String[] startFiles = dir.list(); + SegmentInfos infos = new SegmentInfos(); + infos.read(dir); + IndexFileDeleter d = new IndexFileDeleter(dir, new KeepOnlyLastCommitDeletionPolicy(), infos, null); + String[] endFiles = dir.list(); - try { - if (0 == x) { - int docId = 12; - for (int i = 0; i < 13; i++) { - if (updates) { - Document d = new Document(); - d.add(new Field("id", Integer.toString(i), Field.Store.YES, - Field.Index.UN_TOKENIZED)); - d.add(new Field("content", "bbb " + i, Field.Store.NO, - Field.Index.TOKENIZED)); - modifier.updateDocument(new Term("id", Integer.toString(docId)), d); - } else { // deletes - modifier.deleteDocuments(new Term("id", Integer.toString(docId))); - // modifier.setNorm(docId, "contents", (float)2.0); - } - docId += 12; + Arrays.sort(startFiles); + Arrays.sort(endFiles); + + // for(int i=0;i last); + last = now; + } + } + + class KeepAllDeletionPolicy implements IndexDeletionPolicy { + int numOnInit; + int numOnCommit; + public void onInit(List commits) { + verifyCommitOrder(commits); + numOnInit++; + } + public void onCommit(List commits) { + verifyCommitOrder(commits); + numOnCommit++; + } + } + + /** + * This is useful for adding to a big index w/ autoCommit + * false when you know readers are not using it. + */ + class KeepNoneOnInitDeletionPolicy implements IndexDeletionPolicy { + int numOnInit; + int numOnCommit; + public void onInit(List commits) { + verifyCommitOrder(commits); + numOnInit++; + // On init, delete all commit points: + Iterator it = commits.iterator(); + while(it.hasNext()) { + ((IndexCommitPoint) it.next()).delete(); + } + } + public void onCommit(List commits) { + verifyCommitOrder(commits); + int size = commits.size(); + // Delete all but last one: + for(int i=0;i 0); + + // Then simplistic check: just verify that the + // segments_N's that still exist are in fact within SECONDS + // seconds of the last one's mod time, and, that I can + // open a reader on each: + long gen = SegmentInfos.getCurrentSegmentGeneration(dir); + + String fileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, + "", + gen); + long newestModTime = dir.fileModified(fileName); + + while(gen > 0) { + try { + IndexReader reader = IndexReader.open(dir); + reader.close(); + fileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, + "", + gen); + long modTime = dir.fileModified(fileName); + assertTrue("commit point was older than " + SECONDS + " seconds but did not get deleted", newestModTime - modTime < (SECONDS*1000)); + } catch (IOException e) { + // OK + break; + } + + dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); + gen--; + } + + dir.close(); + } + + /* + * Test a silly deletion policy that keeps all commits around. + */ + public void testKeepAllDeletionPolicy() throws IOException { + + for(int pass=0;pass<4;pass++) { + + boolean autoCommit = pass < 2; + boolean useCompoundFile = (pass % 2) > 0; + + KeepAllDeletionPolicy policy = new KeepAllDeletionPolicy(); + + Directory dir = new RAMDirectory(); + + IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); + writer.setUseCompoundFile(useCompoundFile); + for(int i=0;i<107;i++) { + addDoc(writer); + } + writer.close(); + + writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy); + writer.setUseCompoundFile(useCompoundFile); + writer.optimize(); + writer.close(); + + assertEquals(2, policy.numOnInit); + if (autoCommit) { + assertTrue(policy.numOnCommit > 2); + } else { + // If we are not auto committing then there should + // be exactly 2 commits (one per close above): + assertEquals(2, policy.numOnCommit); + } + + // Simplistic check: just verify all segments_N's still + // exist, and, I can open a reader on each: + long gen = SegmentInfos.getCurrentSegmentGeneration(dir); + while(gen > 0) { + IndexReader reader = IndexReader.open(dir); + reader.close(); + dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); + gen--; + + if (gen > 0) { + // Now that we've removed a commit point, which + // should have orphan'd at least one index file. + // Open & close a writer and assert that it + // actually removed something: + int preCount = dir.list().length; + writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), false, policy); + writer.close(); + int postCount = dir.list().length; + assertTrue(postCount < preCount); + } + } + + dir.close(); + } + } + + /* Test keeping NO commit points. This is a viable and + * useful case eg where you want to build a big index with + * autoCommit false and you know there are no readers. + */ + public void testKeepNoneOnInitDeletionPolicy() throws IOException { + + for(int pass=0;pass<4;pass++) { + + boolean autoCommit = pass < 2; + boolean useCompoundFile = (pass % 2) > 0; + + KeepNoneOnInitDeletionPolicy policy = new KeepNoneOnInitDeletionPolicy(); + + Directory dir = new RAMDirectory(); + + IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); + writer.setUseCompoundFile(useCompoundFile); + for(int i=0;i<107;i++) { + addDoc(writer); + } + writer.close(); + + writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy); + writer.setUseCompoundFile(useCompoundFile); + writer.optimize(); + writer.close(); + + assertEquals(2, policy.numOnInit); + if (autoCommit) { + assertTrue(policy.numOnCommit > 2); + } else { + // If we are not auto committing then there should + // be exactly 2 commits (one per close above): + assertEquals(2, policy.numOnCommit); + } + + // Simplistic check: just verify the index is in fact + // readable: + IndexReader reader = IndexReader.open(dir); + reader.close(); + + dir.close(); + } + } + + /* + * Test a deletion policy that keeps last N commits. + */ + public void testKeepLastNDeletionPolicy() throws IOException { + + final int N = 5; + + for(int pass=0;pass<4;pass++) { + + boolean autoCommit = pass < 2; + boolean useCompoundFile = (pass % 2) > 0; + + Directory dir = new RAMDirectory(); + + KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(N); + + for(int j=0;j 0); + assertEquals(N+1, policy.numOnInit); + if (autoCommit) { + assertTrue(policy.numOnCommit > 1); + } else { + assertEquals(N+1, policy.numOnCommit); + } + + // Simplistic check: just verify only the past N segments_N's still + // exist, and, I can open a reader on each: + long gen = SegmentInfos.getCurrentSegmentGeneration(dir); + for(int i=0;i 0; + + KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(N); + + Directory dir = new RAMDirectory(); + IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); + writer.setUseCompoundFile(useCompoundFile); + writer.close(); + Term searchTerm = new Term("content", "aaa"); + Query query = new TermQuery(searchTerm); + + for(int i=0;i 2*(N+2)-1); + } else { + assertEquals(2*(N+2)-1, policy.numOnCommit); + } + + IndexSearcher searcher = new IndexSearcher(dir); + Hits hits = searcher.search(query); + assertEquals(176, hits.length()); + + // Simplistic check: just verify only the past N segments_N's still + // exist, and, I can open a reader on each: + long gen = SegmentInfos.getCurrentSegmentGeneration(dir); + + int expectedCount = 176; + + for(int i=0;i 1) { + if (i % 2 == 0) { + expectedCount += 1; + } else { + expectedCount -= 17; + } + } + assertEquals(expectedCount, hits.length()); + searcher.close(); + } + reader.close(); + if (i == N) { + fail("should have failed on commits before last 5"); + } + } catch (IOException e) { + if (i != N) { + throw e; + } + } + if (i < N) { + dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); + } + gen--; + } + + dir.close(); + } + } + + /* + * Test a deletion policy that keeps last N commits + * around, through creates. + */ + public void testKeepLastNDeletionPolicyWithCreates() throws IOException { + + final int N = 10; + + for(int pass=0;pass<4;pass++) { + + boolean autoCommit = pass < 2; + boolean useCompoundFile = (pass % 2) > 0; + + KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(N); + + Directory dir = new RAMDirectory(); + IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); + writer.setUseCompoundFile(useCompoundFile); + writer.close(); + Term searchTerm = new Term("content", "aaa"); + Query query = new TermQuery(searchTerm); + + for(int i=0;i 3*(N+1)-1); + } else { + assertEquals(2*(N+1), policy.numOnCommit); + } + + IndexSearcher searcher = new IndexSearcher(dir); + Hits hits = searcher.search(query); + assertEquals(0, hits.length()); + + // Simplistic check: just verify only the past N segments_N's still + // exist, and, I can open a reader on each: + long gen = SegmentInfos.getCurrentSegmentGeneration(dir); + + int expectedCount = 0; + + for(int i=0;iExpert: implement this interface, and pass it to one + * of the {@link IndexWriter} or {@link IndexReader} + * constructors, to customize when "point in time" commits + * are deleted from an index. The default deletion policy + * is {@link KeepOnlyLastCommitDeletionPolicy}, which always + * removes old commits as soon as a new commit is done (this + * matches the behavior before 2.2).

+ * + *

One expected use case for this (and the reason why it + * was first created) is to work around problems with an + * index directory accessed via filesystems like NFS because + * NFS does not provide the "delete on last close" semantics + * that Lucene's "point in time" search normally relies on. + * By implementing a custom deletion policy, such as "a + * commit is only removed once it has been stale for more + * than X minutes", you can give your readers time to + * refresh to the new commit before {@link IndexWriter} + * removes the old commits. Note that doing so will + * increase the storage requirements of the index. See LUCENE-710 + * for details.

+ */ + +public interface IndexDeletionPolicy { + + /** + *

This is called once when a writer is first + * instantiated to give the policy a chance to remove old + * commit points.

+ * + *

The writer locates all commits present in the index + * and calls this method. The policy may choose to delete + * commit points. To delete a commit point, call the + * {@link IndexCommitPoint#delete} method.

+ * + * @param commits List of {@link IndexCommitPoint}, + * sorted by age (the 0th one is the oldest commit). + */ + public void onInit(List commits) throws IOException; + + /** + *

This is called each time the writer commits. This + * gives the policy a chance to remove old commit points + * with each commit.

+ * + *

If writer has autoCommit = true then + * this method will in general be called many times during + * one instance of {@link IndexWriter}. If + * autoCommit = false then this method is + * only called once when {@link IndexWriter#close} is + * called, or not at all if the {@link IndexWriter#abort} + * is called. The policy may now choose to delete old + * commit points by calling {@link IndexCommitPoint#delete}. + * + * @param commits List of {@link IndexCommitPoint}>, + * sorted by age (the 0th one is the oldest commit). + */ + public void onCommit(List commits) throws IOException; +} Property changes on: src/java/org/apache/lucene/index/IndexDeletionPolicy.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/java/org/apache/lucene/index/MultiReader.java =================================================================== --- src/java/org/apache/lucene/index/MultiReader.java (revision 516341) +++ src/java/org/apache/lucene/index/MultiReader.java (working copy) @@ -220,13 +220,6 @@ return new MultiTermPositions(subReaders, starts); } - protected void setDeleter(IndexFileDeleter deleter) { - // Share deleter to our SegmentReaders: - this.deleter = deleter; - for (int i = 0; i < subReaders.length; i++) - subReaders[i].setDeleter(deleter); - } - protected void doCommit() throws IOException { for (int i = 0; i < subReaders.length; i++) subReaders[i].commit(); Index: src/java/org/apache/lucene/index/IndexReader.java =================================================================== --- src/java/org/apache/lucene/index/IndexReader.java (revision 516341) +++ src/java/org/apache/lucene/index/IndexReader.java (working copy) @@ -114,7 +114,7 @@ private Directory directory; private boolean directoryOwner; private boolean closeDirectory; - protected IndexFileDeleter deleter; + private IndexDeletionPolicy deletionPolicy; private boolean isClosed; private SegmentInfos segmentInfos; @@ -131,30 +131,44 @@ path. * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error - */ + * @param path the path to the index directory */ public static IndexReader open(String path) throws CorruptIndexException, IOException { - return open(FSDirectory.getDirectory(path), true); + return open(FSDirectory.getDirectory(path), true, null); } /** Returns an IndexReader reading the index in an FSDirectory in the named - path. + * path. * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error - */ + * @param path the path to the index directory */ public static IndexReader open(File path) throws CorruptIndexException, IOException { - return open(FSDirectory.getDirectory(path), true); + return open(FSDirectory.getDirectory(path), true, null); } /** Returns an IndexReader reading the index in the given Directory. * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error + * @param directory the index directory */ public static IndexReader open(final Directory directory) throws CorruptIndexException, IOException { - return open(directory, false); + return open(directory, false, null); } - private static IndexReader open(final Directory directory, final boolean closeDirectory) throws CorruptIndexException, IOException { + /** Expert: returns an IndexReader reading the index in the given + * Directory, with a custom {@link IndexDeletionPolicy}. + * @param directory the index directory + * @param deletionPolicy a custom deletion policy (only used + * if you use this reader to perform deletes or to set + * norms); see {@link IndexWriter} for details. + * @throws CorruptIndexException if the index is corrupt + * @throws IOException if there is a low-level IO error + */ + public static IndexReader open(final Directory directory, IndexDeletionPolicy deletionPolicy) throws CorruptIndexException, IOException { + return open(directory, false, deletionPolicy); + } + private static IndexReader open(final Directory directory, final boolean closeDirectory, final IndexDeletionPolicy deletionPolicy) throws CorruptIndexException, IOException { + return (IndexReader) new SegmentInfos.FindSegmentsFile(directory) { protected Object doBody(String segmentFileName) throws CorruptIndexException, IOException { @@ -162,8 +176,10 @@ SegmentInfos infos = new SegmentInfos(); infos.read(directory, segmentFileName); + IndexReader reader; + if (infos.size() == 1) { // index is optimized - return SegmentReader.get(infos, infos.info(0), closeDirectory); + reader = SegmentReader.get(infos, infos.info(0), closeDirectory); } else { // To reduce the chance of hitting FileNotFound @@ -184,8 +200,10 @@ } } - return new MultiReader(directory, infos, closeDirectory, readers); + reader = new MultiReader(directory, infos, closeDirectory, readers); } + reader.deletionPolicy = deletionPolicy; + return reader; } }.run(); } @@ -715,20 +733,14 @@ */ protected final synchronized void commit() throws IOException { if(hasChanges){ - if (deleter == null) { - // In the MultiReader case, we share this deleter - // across all SegmentReaders: - setDeleter(new IndexFileDeleter(segmentInfos, directory)); - } if(directoryOwner){ - // Should not be necessary: no prior commit should - // have left pending files, so just defensive: - deleter.clearPendingFiles(); + // Default deleter (for backwards compatibility) is + // KeepOnlyLastCommitDeleter: + IndexFileDeleter deleter = new IndexFileDeleter(directory, + deletionPolicy == null ? new KeepOnlyLastCommitDeletionPolicy() : deletionPolicy, + segmentInfos, null); - String oldInfoFileName = segmentInfos.getCurrentSegmentFileName(); - String nextSegmentsFileName = segmentInfos.getNextSegmentFileName(); - // Checkpoint the state we are about to change, in // case we have to roll back: startCommit(); @@ -749,24 +761,16 @@ // actually in the index): rollbackCommit(); - // Erase any pending files that we were going to delete: - deleter.clearPendingFiles(); - - // Remove possibly partially written next - // segments file: - deleter.deleteFile(nextSegmentsFileName); - // Recompute deletable files & remove them (so // partially written .del files, etc, are // removed): - deleter.findDeletableFiles(); - deleter.deleteFiles(); + deleter.refresh(); } } - // Attempt to delete all files we just obsoleted: - deleter.deleteFile(oldInfoFileName); - deleter.commitPendingFiles(); + // Have the deleter remove any now unreferenced + // files due to this commit: + deleter.checkpoint(segmentInfos, true); if (writeLock != null) { writeLock.release(); // release write lock @@ -779,13 +783,6 @@ hasChanges = false; } - protected void setDeleter(IndexFileDeleter deleter) { - this.deleter = deleter; - } - protected IndexFileDeleter getDeleter() { - return deleter; - } - /** Implements commit. */ protected abstract void doCommit() throws IOException; Index: src/java/org/apache/lucene/index/IndexCommitPoint.java =================================================================== --- src/java/org/apache/lucene/index/IndexCommitPoint.java (revision 0) +++ src/java/org/apache/lucene/index/IndexCommitPoint.java (revision 0) @@ -0,0 +1,41 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Represents a single commit into an index as seen by the + * {@link IndexDeletionPolicy}. + */ + +public interface IndexCommitPoint { + + /** + * Get the segments file (ie, segments_N) of + * this commit point. + */ + public String getSegmentsFileName(); + + /** + * Notify the writer that this commit point should be + * deleted. This should only be called by the {@link + * IndexDeletionPolicy} during its {@link + * IndexDeletionPolicy#onInit} or {@link + * IndexDeletionPolicy#onCommit} method. + */ + public void delete(); +} Property changes on: src/java/org/apache/lucene/index/IndexCommitPoint.java ___________________________________________________________________ Name: svn:eol-style + native Index: src/java/org/apache/lucene/index/IndexFileNames.java =================================================================== --- src/java/org/apache/lucene/index/IndexFileNames.java (revision 516341) +++ src/java/org/apache/lucene/index/IndexFileNames.java (working copy) @@ -37,7 +37,19 @@ /** Extension of norms file */ static final String NORMS_EXTENSION = "nrm"; - + + /** Extension of compound file */ + static final String COMPOUND_FILE_EXTENSION = "cfs"; + + /** Extension of deletes */ + static final String DELETES_EXTENSION = "del"; + + /** Extension of single norms */ + static final String SINGLE_NORMS_EXTENSION = "f"; + + /** Extension of separate norms */ + static final String SEPARATE_NORMS_EXTENSION = "s"; + /** * This array contains all filename extensions used by * Lucene's index files, with two exceptions, namely the @@ -50,6 +62,13 @@ "cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del", "tvx", "tvd", "tvf", "gen", "nrm" }; + + /** File extensions that are added to a compound file + * (same as above, minus "del", "gen", "cfs"). */ + static final String[] INDEX_EXTENSIONS_IN_COMPOUND_FILE = new String[] { + "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", + "tvx", "tvd", "tvf", "nrm" + }; /** File extensions of old-style index files */ static final String COMPOUND_EXTENSIONS[] = new String[] { Index: src/java/org/apache/lucene/index/SegmentInfos.java =================================================================== --- src/java/org/apache/lucene/index/SegmentInfos.java (revision 516341) +++ src/java/org/apache/lucene/index/SegmentInfos.java (working copy) @@ -88,16 +88,9 @@ for (int i = 0; i < files.length; i++) { String file = files[i]; if (file.startsWith(IndexFileNames.SEGMENTS) && !file.equals(IndexFileNames.SEGMENTS_GEN)) { - if (file.equals(IndexFileNames.SEGMENTS)) { - // Pre lock-less commits: - if (max == -1) { - max = 0; - } - } else { - long v = Long.parseLong(file.substring(prefixLen), Character.MAX_RADIX); - if (v > max) { - max = v; - } + long gen = generationFromSegmentsFileName(file); + if (gen > max) { + max = gen; } } } @@ -152,6 +145,22 @@ } /** + * Parse the generation off the segments file name and + * return it. + */ + public static long generationFromSegmentsFileName(String fileName) { + if (fileName.equals(IndexFileNames.SEGMENTS)) { + return 0; + } else if (fileName.startsWith(IndexFileNames.SEGMENTS)) { + return Long.parseLong(fileName.substring(1+IndexFileNames.SEGMENTS.length()), + Character.MAX_RADIX); + } else { + throw new IllegalArgumentException("fileName \"" + fileName + "\" is not a segments file"); + } + } + + + /** * Get the next segments_N filename that will be written. */ public String getNextSegmentFileName() { @@ -181,12 +190,8 @@ IndexInput input = directory.openInput(segmentFileName); - if (segmentFileName.equals(IndexFileNames.SEGMENTS)) { - generation = 0; - } else { - generation = Long.parseLong(segmentFileName.substring(1+IndexFileNames.SEGMENTS.length()), - Character.MAX_RADIX); - } + generation = generationFromSegmentsFileName(segmentFileName); + lastGeneration = generation; try { @@ -255,6 +260,8 @@ IndexOutput output = directory.createOutput(segmentFileName); + boolean success = false; + try { output.writeInt(FORMAT_SINGLE_NORM_FILE); // write FORMAT output.writeLong(++version); // every write changes @@ -266,7 +273,16 @@ } } finally { - output.close(); + try { + output.close(); + success = true; + } finally { + if (!success) { + // Try not to leave a truncated segments_N file in + // the index: + directory.deleteFile(segmentFileName); + } + } } try { @@ -305,6 +321,9 @@ public long getVersion() { return version; } + public long getGeneration() { + return generation; + } /** * Current version number from segments file. Index: src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- src/java/org/apache/lucene/index/IndexWriter.java (revision 516341) +++ src/java/org/apache/lucene/index/IndexWriter.java (working copy) @@ -29,48 +29,100 @@ import java.io.File; import java.io.IOException; import java.io.PrintStream; -import java.util.Vector; -import java.util.HashSet; +import java.util.ArrayList; +import java.util.List; import java.util.HashMap; import java.util.Iterator; import java.util.Map.Entry; /** - An IndexWriter creates and maintains an index. + An IndexWriter creates and maintains an index. -

The third argument (create) to the +

The create argument to the constructor determines whether a new index is created, or whether an existing index is - opened for the addition of new documents. Note that you - can open an index with create=true even while readers are + opened. Note that you + can open an index with create=true even while readers are using the index. The old readers will continue to search the "point in time" snapshot they had opened, and won't - see the newly created index until they re-open.

+ see the newly created index until they re-open. There are + also constructors + with no create argument which + will create a new index if there is not already an index at the + provided path and otherwise open the existing index.

-

In either case, documents are added with the addDocument method. - When finished adding documents, close should be called.

+

In either case, documents are added with addDocument + and removed with deleteDocuments. + A document can be updated with updateDocument + (which just deletes and then adds). When finished adding, deleting and updating documents, close should be called.

+

These changes are buffered in memory and periodically + flushed to the {@link Directory} (during the above method calls). A flush is triggered when there are + enough buffered deletes (see {@link + #setMaxBufferedDeleteTerms}) or enough added documents + (see {@link #setMaxBufferedDocs}) since the last flush, + whichever is sooner. When a flush occurs, both pending + deletes and added documents are flushed to the index. A + flush may also trigger one or more segment merges.

+ + +

The optional autoCommit argument to the + constructors + controls visibility of the changes to {@link IndexReader} instances reading the same index. + When this is false, changes are not + visible until {@link #close()} is called. + Note that changes will still be flushed to the + {@link org.apache.lucene.store.Directory} as new files, + but are not committed (no new segments_N file + is written referencing the new files) until {@link #close} is + called. If something goes terribly wrong (for example the + JVM crashes) before {@link #close()}, then + the index will reflect none of the changes made (it will + remain in its starting state). + You can also call {@link #abort()}, which closes the writer without committing any + changes, and removes any index + files that had been flushed but are now unreferenced. + This mode is useful for preventing readers from refreshing + at a bad time (for example after you've done all your + deletes but before you've done your adds). + It can also be used to implement simple single-writer + transactional semantics ("all or none").

+ +

When autoCommit is true then + every flush is also a commit ({@link IndexReader} + instances will see each flush as changes to the index). + This is the default, to match the behavior before 2.2. + When running in this mode, be careful not to refresh your + readers while optimize or segment merges are taking place + as this can tie up substantial disk space.

+

If an index will not have more documents added for a while and optimal search performance is desired, then the optimize method should be called before the index is closed.

- -

Opening an IndexWriter creates a lock file for the directory in use. Trying to open - another IndexWriter on the same directory will lead to a + +

Opening an IndexWriter creates a lock file for the directory in use. Trying to open + another IndexWriter on the same directory will lead to a {@link LockObtainFailedException}. The {@link LockObtainFailedException} is also thrown if an IndexReader on the same directory is used to delete documents from the index.

-

As of 2.1, IndexWriter can now delete documents - by {@link Term} (see {@link #deleteDocuments} ) and update - (delete then add) documents (see {@link #updateDocument}). - Deletes are buffered until {@link - #setMaxBufferedDeleteTerms} Terms at which - point they are flushed to the index. Note that a flush - occurs when there are enough buffered deletes or enough - added documents, whichever is sooner. When a flush - occurs, both pending deletes and added documents are - flushed to the index.

+ +

Expert: IndexWriter allows an optional + {@link IndexDeletionPolicy} implementation to be + specified. You can use this to control when prior commits + are deleted from the index. The default policy is {@link + KeepOnlyLastCommitDeletionPolicy} which removes all prior + commits as soon as a new commit is done (this matches + behavior before 2.2). Creating your own policy can allow + you to explicitly keep previous "point in time" commits + alive in the index for some time, to allow readers to + refresh to the new commit without having the old commit + deleted out from under them. This is necessary on + filesystems like NFS that do not support "delete on last + close" semantics, which Lucene's "point in time" search + normally relies on.

*/ public class IndexWriter { @@ -83,6 +135,9 @@ private long writeLockTimeout = WRITE_LOCK_TIMEOUT; + /** + * Name of the write lock in the index. + */ public static final String WRITE_LOCK_NAME = "write.lock"; /** @@ -120,11 +175,13 @@ private Similarity similarity = Similarity.getDefault(); // how to normalize - private boolean inTransaction = false; // true iff we are in a transaction private boolean commitPending; // true if segmentInfos has changes not yet committed - private HashSet protectedSegments; // segment names that should not be deleted until commit private SegmentInfos rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails + private SegmentInfos localRollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails + private boolean localAutoCommit; // saved autoCommit during local transaction + private boolean autoCommit = true; // false if we should commit only on close + SegmentInfos segmentInfos = new SegmentInfos(); // the segments SegmentInfos ramSegmentInfos = new SegmentInfos(); // the segments in ramDirectory private final RAMDirectory ramDirectory = new RAMDirectory(); // for temp segs @@ -238,7 +295,7 @@ */ public IndexWriter(String path, Analyzer a, boolean create) throws CorruptIndexException, LockObtainFailedException, IOException { - init(path, a, create); + init(FSDirectory.getDirectory(path), a, create, true, null, true); } /** @@ -263,7 +320,7 @@ */ public IndexWriter(File path, Analyzer a, boolean create) throws CorruptIndexException, LockObtainFailedException, IOException { - init(path, a, create); + init(FSDirectory.getDirectory(path), a, create, true, null, true); } /** @@ -288,14 +345,14 @@ */ public IndexWriter(Directory d, Analyzer a, boolean create) throws CorruptIndexException, LockObtainFailedException, IOException { - init(d, a, create, false); + init(d, a, create, false, null, true); } /** * Constructs an IndexWriter for the index in - * path, creating it first if it does not - * already exist, otherwise appending to the existing - * index. Text will be analyzed with a. + * path, first creating it if it does not + * already exist. Text will be analyzed with + * a. * * @param path the path to the index directory * @param a the analyzer to use @@ -309,18 +366,13 @@ */ public IndexWriter(String path, Analyzer a) throws CorruptIndexException, LockObtainFailedException, IOException { - if (IndexReader.indexExists(path)) { - init(path, a, false); - } else { - init(path, a, true); - } + init(FSDirectory.getDirectory(path), a, true, null, true); } /** * Constructs an IndexWriter for the index in - * path, creating it first if it does not - * already exist, otherwise appending to the existing - * index. Text will be analyzed with + * path, first creating it if it does not + * already exist. Text will be analyzed with * a. * * @param path the path to the index directory @@ -335,18 +387,14 @@ */ public IndexWriter(File path, Analyzer a) throws CorruptIndexException, LockObtainFailedException, IOException { - if (IndexReader.indexExists(path)) { - init(path, a, false); - } else { - init(path, a, true); - } + init(FSDirectory.getDirectory(path), a, true, null, true); } /** * Constructs an IndexWriter for the index in - * d, creating it first if it does not - * already exist, otherwise appending to the existing - * index. Text will be analyzed with a. + * d, first creating it if it does not + * already exist. Text will be analyzed with + * a. * * @param d the index directory * @param a the analyzer to use @@ -360,28 +408,124 @@ */ public IndexWriter(Directory d, Analyzer a) throws CorruptIndexException, LockObtainFailedException, IOException { - if (IndexReader.indexExists(d)) { - init(d, a, false, false); - } else { - init(d, a, true, false); - } + init(d, a, false, null, true); } - private void init(String path, Analyzer a, final boolean create) + /** + * Constructs an IndexWriter for the index in + * d, first creating it if it does not + * already exist. Text will be analyzed with + * a. + * + * @param d the index directory + * @param autoCommit see above + * @param a the analyzer to use + * @throws CorruptIndexException if the index is corrupt + * @throws LockObtainFailedException if another writer + * has this index open (write.lock could not + * be obtained) + * @throws IOException if the directory cannot be + * read/written to or if there is any other low-level + * IO error + */ + public IndexWriter(Directory d, boolean autoCommit, Analyzer a) throws CorruptIndexException, LockObtainFailedException, IOException { - init(FSDirectory.getDirectory(path), a, create, true); + init(d, a, false, null, autoCommit); } - private void init(File path, Analyzer a, final boolean create) + /** + * Constructs an IndexWriter for the index in d. + * Text will be analyzed with a. If create + * is true, then a new, empty index will be created in + * d, replacing the index already there, if any. + * + * @param d the index directory + * @param autoCommit see above + * @param a the analyzer to use + * @param create true to create the index or overwrite + * the existing one; false to append to the existing + * index + * @throws CorruptIndexException if the index is corrupt + * @throws LockObtainFailedException if another writer + * has this index open (write.lock could not + * be obtained) + * @throws IOException if the directory cannot be read/written to, or + * if it does not exist and create is + * false or if there is any other low-level + * IO error + */ + public IndexWriter(Directory d, boolean autoCommit, Analyzer a, boolean create) + throws CorruptIndexException, LockObtainFailedException, IOException { + init(d, a, create, false, null, autoCommit); + } + + /** + * Expert: constructs an IndexWriter with a custom {@link + * IndexDeletionPolicy}, for the index in d, + * first creating it if it does not already exist. Text + * will be analyzed with a. + * + * @param d the index directory + * @param autoCommit see above + * @param a the analyzer to use + * @param deletionPolicy see above + * @throws CorruptIndexException if the index is corrupt + * @throws LockObtainFailedException if another writer + * has this index open (write.lock could not + * be obtained) + * @throws IOException if the directory cannot be + * read/written to or if there is any other low-level + * IO error + */ + public IndexWriter(Directory d, boolean autoCommit, Analyzer a, IndexDeletionPolicy deletionPolicy) throws CorruptIndexException, LockObtainFailedException, IOException { - init(FSDirectory.getDirectory(path), a, create, true); + init(d, a, false, deletionPolicy, autoCommit); } - private void init(Directory d, Analyzer a, final boolean create, boolean closeDir) + /** + * Expert: constructs an IndexWriter with a custom {@link + * IndexDeletionPolicy}, for the index in d. + * Text will be analyzed with a. If + * create is true, then a new, empty index + * will be created in d, replacing the index + * already there, if any. + * + * @param d the index directory + * @param autoCommit see above + * @param a the analyzer to use + * @param create true to create the index or overwrite + * the existing one; false to append to the existing + * index + * @param deletionPolicy see above + * @throws CorruptIndexException if the index is corrupt + * @throws LockObtainFailedException if another writer + * has this index open (write.lock could not + * be obtained) + * @throws IOException if the directory cannot be read/written to, or + * if it does not exist and create is + * false or if there is any other low-level + * IO error + */ + public IndexWriter(Directory d, boolean autoCommit, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy) + throws CorruptIndexException, LockObtainFailedException, IOException { + init(d, a, create, false, deletionPolicy, autoCommit); + } + + private void init(Directory d, Analyzer a, boolean closeDir, IndexDeletionPolicy deletionPolicy, boolean autoCommit) throws CorruptIndexException, LockObtainFailedException, IOException { + if (IndexReader.indexExists(d)) { + init(d, a, false, closeDir, deletionPolicy, autoCommit); + } else { + init(d, a, true, closeDir, deletionPolicy, autoCommit); + } + } + + private void init(Directory d, Analyzer a, final boolean create, boolean closeDir, IndexDeletionPolicy deletionPolicy, boolean autoCommit) + throws CorruptIndexException, LockObtainFailedException, IOException { this.closeDir = closeDir; directory = d; analyzer = a; + this.infoStream = defaultInfoStream; if (create) { // Clear the write lock in case it's leftover: @@ -410,13 +554,17 @@ segmentInfos.read(directory); } - // Create a deleter to keep track of which files can - // be deleted: - deleter = new IndexFileDeleter(segmentInfos, directory); - deleter.setInfoStream(infoStream); - deleter.findDeletableFiles(); - deleter.deleteFiles(); + this.autoCommit = autoCommit; + if (!autoCommit) { + rollbackSegmentInfos = (SegmentInfos) segmentInfos.clone(); + } + // Default deleter (for backwards compatibility) is + // KeepOnlyLastCommitDeleter: + deleter = new IndexFileDeleter(directory, + deletionPolicy == null ? new KeepOnlyLastCommitDeletionPolicy() : deletionPolicy, + segmentInfos, infoStream); + } catch (IOException e) { this.writeLock.release(); this.writeLock = null; @@ -533,11 +681,28 @@ return mergeFactor; } - /** If non-null, information about merges and a message when - * maxFieldLength is reached will be printed to this. + /** If non-null, this will be the default infoStream used + * by a newly instantiated IndexWriter. + * @see #setInfoStream */ + public static void setDefaultInfoStream(PrintStream infoStream) { + IndexWriter.defaultInfoStream = infoStream; + } + + /** + * @see #setDefaultInfoStream + */ + public static PrintStream getDefaultInfoStream() { + return IndexWriter.defaultInfoStream; + } + + /** If non-null, information about merges, deletes and a + * message when maxFieldLength is reached will be printed + * to this. + */ public void setInfoStream(PrintStream infoStream) { this.infoStream = infoStream; + deleter.setInfoStream(infoStream); } /** @@ -613,6 +778,14 @@ */ public synchronized void close() throws CorruptIndexException, IOException { flushRamSegments(); + + if (commitPending) { + segmentInfos.write(directory); // now commit changes + deleter.checkpoint(segmentInfos, true); + commitPending = false; + rollbackSegmentInfos = null; + } + ramDirectory.close(); if (writeLock != null) { writeLock.release(); // release write lock @@ -737,7 +910,9 @@ dw.setInfoStream(infoStream); String segmentName = newRamSegmentName(); dw.addDocument(segmentName, doc); - return new SegmentInfo(segmentName, 1, ramDirectory, false, false); + SegmentInfo si = new SegmentInfo(segmentName, 1, ramDirectory, false, false); + si.setNumFields(dw.getNumFields()); + return si; } /** @@ -871,6 +1046,7 @@ */ private PrintStream infoStream = null; + private static PrintStream defaultInfoStream = null; /** Merges all segments together into a single segment, * optimizing an index for search. @@ -949,21 +1125,18 @@ * merges that happen (or ram segments flushed) will not * write a new segments file and will not remove any files * that were present at the start of the transaction. You - * must make a matched (try/finall) call to + * must make a matched (try/finally) call to * commitTransaction() or rollbackTransaction() to finish * the transaction. */ private void startTransaction() throws IOException { - if (inTransaction) { - throw new IOException("transaction is already in process"); + localRollbackSegmentInfos = (SegmentInfos) segmentInfos.clone(); + localAutoCommit = autoCommit; + if (localAutoCommit) { + flushRamSegments(); + // Turn off auto-commit during our local transaction: + autoCommit = false; } - rollbackSegmentInfos = (SegmentInfos) segmentInfos.clone(); - protectedSegments = new HashSet(); - for(int i=0;iIndexWriter without committing + * any of the changes that have occurred since it was + * opened. This removes any temporary files that had been + * created, after which the state of the index will be the + * same as it was when this writer was first opened. This + * can only be called when this IndexWriter was opened + * with autoCommit=false. + * @throws IllegalStateException if this is called when + * the writer was opened with autoCommit=true. + * @throws IOException if there is a low-level IO error + */ + public void abort() throws IOException { + if (!autoCommit) { + // Keep the same segmentInfos instance but replace all + // of its SegmentInfo instances. This is so the next + // attempt to commit using this instance of IndexWriter + // will always write to a new generation ("write once"). + segmentInfos.clear(); + segmentInfos.addAll(rollbackSegmentInfos); + // Ask deleter to locate unreferenced files & remove + // them: + deleter.checkpoint(segmentInfos, false); + deleter.refresh(); + ramSegmentInfos = new SegmentInfos(); + bufferedDeleteTerms.clear(); + numBufferedDeleteTerms = 0; + + commitPending = false; + close(); + + } else { + throw new IllegalStateException("abort() can only be called when IndexWriter was opened with autoCommit=false"); + } + } + + /* + * Called whenever the SegmentInfos has been updated and + * the index files referenced exist (correctly) in the + * index directory. If we are in autoCommit mode, we + * commit the change immediately. Else, we mark + * commitPending. + */ + private void checkpoint() throws IOException { + if (autoCommit) { + segmentInfos.write(directory); + } else { + commitPending = true; + } + } + /** Merges all segments from an array of indexes into this index. * *

This may be used to parallelize batch indexing. A large document @@ -1266,16 +1484,13 @@ final String mergedName = newSegmentName(); SegmentMerger merger = new SegmentMerger(this, mergedName); - final Vector segmentsToDelete = new Vector(); SegmentInfo info; - String segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName(); IndexReader sReader = null; try { if (segmentInfos.size() == 1){ // add existing index, if any sReader = SegmentReader.get(segmentInfos.info(0)); merger.add(sReader); - segmentsToDelete.addElement(sReader); // queue segment for deletion } for (int i = 0; i < readers.length; i++) // add new indexes @@ -1288,16 +1503,15 @@ try { int docCount = merger.merge(); // merge 'em - segmentInfos.setSize(0); // pop old infos & add new - info = new SegmentInfo(mergedName, docCount, directory, false, true); - segmentInfos.addElement(info); - commitPending = true; - if(sReader != null) { sReader.close(); sReader = null; } + segmentInfos.setSize(0); // pop old infos & add new + info = new SegmentInfo(mergedName, docCount, directory, false, true); + segmentInfos.addElement(info); + success = true; } finally { @@ -1312,26 +1526,16 @@ sReader.close(); } } + + if (useCompoundFile) { - deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file - deleter.deleteSegments(segmentsToDelete); // delete now-unused segments - - if (useCompoundFile) { boolean success = false; - segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName(); - Vector filesToDelete; - startTransaction(); try { - - filesToDelete = merger.createCompoundFile(mergedName + ".cfs"); - + merger.createCompoundFile(mergedName + ".cfs"); info.setUseCompoundFile(true); - commitPending = true; - success = true; - } finally { if (!success) { rollbackTransaction(); @@ -1339,9 +1543,6 @@ commitTransaction(); } } - - deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file - deleter.deleteFiles(filesToDelete); // delete now unused files of segment } } @@ -1500,14 +1701,12 @@ final String mergedName = newSegmentName(); SegmentMerger merger = null; - final Vector segmentsToDelete = new Vector(); + final List ramSegmentsToDelete = new ArrayList(); - String segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName(); - String nextSegmentsFileName = segmentInfos.getNextSegmentFileName(); - SegmentInfo newSegment = null; int mergedDocCount = 0; + boolean anyDeletes = (bufferedDeleteTerms.size() != 0); // This is try/finally to make sure merger's readers are closed: try { @@ -1522,9 +1721,9 @@ infoStream.print(" " + si.name + " (" + si.docCount + " docs)"); IndexReader reader = SegmentReader.get(si); // no need to set deleter (yet) merger.add(reader); - if ((reader.directory() == this.directory) || // if we own the directory - (reader.directory() == this.ramDirectory)) - segmentsToDelete.addElement(reader); // queue segment for deletion + if (reader.directory() == this.ramDirectory) { + ramSegmentsToDelete.add(si); + } } } @@ -1545,9 +1744,8 @@ newSegment = new SegmentInfo(mergedName, mergedDocCount, directory, false, true); } - - if (!inTransaction - && (sourceSegments != ramSegmentInfos || bufferedDeleteTerms.size() > 0)) { + + if (sourceSegments != ramSegmentInfos || anyDeletes) { // Now save the SegmentInfo instances that // we are replacing: rollback = (SegmentInfos) segmentInfos.clone(); @@ -1565,19 +1763,12 @@ } if (sourceSegments == ramSegmentInfos) { - // Should not be necessary: no prior commit should - // have left pending files, so just defensive: - deleter.clearPendingFiles(); maybeApplyDeletes(doMerge); doAfterFlush(); } + + checkpoint(); - if (!inTransaction) { - segmentInfos.write(directory); // commit before deleting - } else { - commitPending = true; - } - success = true; } finally { @@ -1589,11 +1780,10 @@ if (sourceSegments == ramSegmentInfos) { ramSegmentInfos.removeAllElements(); } - } else if (!inTransaction) { + } else { // Must rollback so our state matches index: - - if (sourceSegments == ramSegmentInfos && 0 == bufferedDeleteTerms.size()) { + if (sourceSegments == ramSegmentInfos && !anyDeletes) { // Simple case: newSegment may or may not have // been added to the end of our segment infos, // so just check & remove if so: @@ -1611,14 +1801,8 @@ segmentInfos.addAll(rollback); } - // Erase any pending files that we were going to delete: - // i.e. old del files added by SegmentReader.doCommit() - deleter.clearPendingFiles(); - - // Delete any partially created files: - deleter.deleteFile(nextSegmentsFileName); - deleter.findDeletableFiles(); - deleter.deleteFiles(); + // Delete any partially created and now unreferenced files: + deleter.refresh(); } } } finally { @@ -1626,53 +1810,33 @@ if (doMerge) merger.closeReaders(); } - if (!inTransaction) { - // Attempt to delete all files we just obsoleted: - deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file - deleter.deleteSegments(segmentsToDelete); // delete now-unused segments - // Includes the old del files - deleter.commitPendingFiles(); - } else { - deleter.addPendingFile(segmentsInfosFileName); // delete old segments_N file - deleter.deleteSegments(segmentsToDelete, protectedSegments); // delete now-unused segments - } + // Delete the RAM segments + deleter.deleteDirect(ramDirectory, ramSegmentsToDelete); + // Give deleter a chance to remove files now. + deleter.checkpoint(segmentInfos, autoCommit); + if (useCompoundFile && doMerge) { - segmentsInfosFileName = nextSegmentsFileName; - nextSegmentsFileName = segmentInfos.getNextSegmentFileName(); - - Vector filesToDelete; - boolean success = false; try { - filesToDelete = merger.createCompoundFile(mergedName + ".cfs"); + merger.createCompoundFile(mergedName + ".cfs"); newSegment.setUseCompoundFile(true); - if (!inTransaction) { - segmentInfos.write(directory); // commit again so readers know we've switched this segment to a compound file - } + checkpoint(); success = true; } finally { - if (!success && !inTransaction) { + if (!success) { // Must rollback: newSegment.setUseCompoundFile(false); - deleter.deleteFile(mergedName + ".cfs"); - deleter.deleteFile(nextSegmentsFileName); + deleter.refresh(); } } - - if (!inTransaction) { - deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file - } - - // We can delete these segments whether or not we are - // in a transaction because we had just written them - // above so they can't need protection by the - // transaction: - deleter.deleteFiles(filesToDelete); // delete now-unused segments + + // Give deleter a chance to remove files now. + deleter.checkpoint(segmentInfos, autoCommit); } return mergedDocCount; @@ -1692,7 +1856,6 @@ IndexReader reader = null; try { reader = SegmentReader.get(segmentInfos.info(segmentInfos.size() - 1)); - reader.setDeleter(deleter); // Apply delete terms to the segment just flushed from ram // apply appropriately so that a delete term is only applied to @@ -1718,7 +1881,6 @@ IndexReader reader = null; try { reader = SegmentReader.get(segmentInfos.info(i)); - reader.setDeleter(deleter); // Apply delete terms to disk segments // except the one just flushed from ram. @@ -1769,7 +1931,7 @@ } // Number of ram segments a delete term applies to. - private class Num { + private static class Num { private int num; Num(int num) { Index: src/java/org/apache/lucene/index/DocumentWriter.java =================================================================== --- src/java/org/apache/lucene/index/DocumentWriter.java (revision 516341) +++ src/java/org/apache/lucene/index/DocumentWriter.java (working copy) @@ -388,6 +388,9 @@ this.infoStream = infoStream; } + int getNumFields() { + return fieldInfos.size(); + } } final class Posting { // info about a Term in a doc Index: src/java/org/apache/lucene/index/IndexFileDeleter.java =================================================================== --- src/java/org/apache/lucene/index/IndexFileDeleter.java (revision 516341) +++ src/java/org/apache/lucene/index/IndexFileDeleter.java (working copy) @@ -18,286 +18,488 @@ */ import org.apache.lucene.index.IndexFileNames; -import org.apache.lucene.index.IndexFileNameFilter; import org.apache.lucene.index.SegmentInfos; +import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.store.Directory; import java.io.IOException; import java.io.PrintStream; -import java.util.Vector; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; +import java.util.List; +import java.util.ArrayList; +import java.util.Collections; -/** - * A utility class (used by both IndexReader and - * IndexWriter) to keep track of files that need to be - * deleted because they are no longer referenced by the - * index. +/* + * This class keeps track of each SegmentInfos instance that + * is still "live", either because it corresponds to a + * segments_N in the Directory (a real commit) or because + * it's the in-memory SegmentInfos that a writer is actively + * updating but has not yet committed (currently this only + * applies when autoCommit=false in IndexWriter). This + * class uses simple reference counting to map the live + * SegmentInfos instances to individual files in the + * Directory. + * + * A separate deletion policy interface + * (IndexDeletionPolicy) is consulted on creation (onInit) + * and once per commit (onCommit), to decide when a commit + * should be removed. + * + * The current default deletion policy is {@link + * KeepOnlyLastCommitDeletionPolicy}, which removes all + * prior commits when a new commit has completed. This + * matches the behavior before 2.2. + * + * Note that you must hold the write.lock before + * instantiating this class. It opens segments_N file(s) + * directly with no retry logic. */ + final class IndexFileDeleter { - private Vector deletable; - private HashSet pending; + + /* Files that we tried to delete but failed (likely + * because they are open and we are running on Windows), + * so we will retry them again later: */ + private List deletable; + + /* Reference count for all files in the index. Maps + * String to RefCount (class below) instances: */ + private HashMap refCounts = new HashMap(); + + /* Holds all commits (segments_N) currently in the index. + * This will have just 1 commit if you are using the + * default delete policy (KeepOnlyLastCommitDeletionPolicy). + * Other policies may leave commit points live for longer + * in which case this list would be longer than 1: */ + private List commits = new ArrayList(); + + /* Holds files we had incref'd from the previous + * non-commit checkpoint: */ + private List lastFiles = new ArrayList(); + + private PrintStream infoStream; + private List toDelete = new ArrayList(); private Directory directory; - private SegmentInfos segmentInfos; - private PrintStream infoStream; + private IndexDeletionPolicy policy; - IndexFileDeleter(SegmentInfos segmentInfos, Directory directory) - throws IOException { - this.segmentInfos = segmentInfos; - this.directory = directory; - } - void setSegmentInfos(SegmentInfos segmentInfos) { - this.segmentInfos = segmentInfos; - } - SegmentInfos getSegmentInfos() { - return segmentInfos; - } - void setInfoStream(PrintStream infoStream) { this.infoStream = infoStream; } + + private void message(String message) { + infoStream.println(this + " " + Thread.currentThread().getName() + ": " + message); + } - /** Determine index files that are no longer referenced - * and therefore should be deleted. This is called once - * (by the writer), and then subsequently we add onto - * deletable any files that are no longer needed at the - * point that we create the unused file (eg when merging - * segments), and we only remove from deletable when a - * file is successfully deleted. + /** + * Initialize the deleter: find all previous commits in + * the Directory, incref the files they reference, call + * the policy to let it delete commits. The incoming + * segmentInfos must have been loaded from a commit point + * and not yet modified. This will remove any files not + * referenced by any of the commits. + * @throws CorruptIndexException if the index is corrupt + * @throws IOException if there is a low-level IO error */ + public IndexFileDeleter(Directory directory, IndexDeletionPolicy policy, SegmentInfos segmentInfos, PrintStream infoStream) + throws CorruptIndexException, IOException { - void findDeletableFiles() throws IOException { + this.infoStream = infoStream; + this.policy = policy; + this.directory = directory; - // Gather all "current" segments: - HashMap current = new HashMap(); - for(int j=0;j 0) { - for (int i = 0; i < segments.size(); i++) { - SegmentReader reader = (SegmentReader)segments.elementAt(i); - if (reader.directory() == this.directory) - deleteFiles(reader.files()); // try to delete our files - else - deleteFiles(reader.files(), reader.directory()); // delete other files + // First decref all files that had been referred to by + // the now-deleted commits: + for(int i=0;i writeTo) { + commits.remove(size-1); + size--; + } } } /** - * Delete these segments, as long as they are not listed - * in protectedSegments. If they are, then, instead, add - * them to the pending set. - */ - - void deleteSegments(Vector segments, HashSet protectedSegments) throws IOException { + * Writer calls this when it has hit an error and had to + * roll back, to tell us that there may now be + * unreferenced files in the filesystem. So we re-list + * the filesystem and delete such files: + */ + public void refresh() throws IOException { + String[] files = directory.list(); + if (files == null) + throw new IOException("cannot read directory " + directory + ": list() returned null"); + IndexFileNameFilter filter = IndexFileNameFilter.getFilter(); + for(int i=0;i 0) { + for(int i=0;i commit.gen) { + return 1; + } else { + return 0; + } + } } } Index: src/java/org/apache/lucene/index/SegmentInfo.java =================================================================== --- src/java/org/apache/lucene/index/SegmentInfo.java (revision 516341) +++ src/java/org/apache/lucene/index/SegmentInfo.java (working copy) @@ -21,6 +21,8 @@ import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexInput; import java.io.IOException; +import java.util.List; +import java.util.ArrayList; final class SegmentInfo { public String name; // unique name in dir @@ -50,6 +52,9 @@ // and true for newly created merged segments (both // compound and non compound). + private List files; // cached list of files that this segment uses + // in the Directory + public SegmentInfo(String name, int docCount, Directory dir) { this.name = name; this.docCount = docCount; @@ -71,6 +76,7 @@ * Copy everything from src SegmentInfo into our instance. */ void reset(SegmentInfo src) { + files = null; name = src.name; docCount = src.docCount; dir = src.dir; @@ -134,7 +140,7 @@ if (!preLockless) { // This is a FORMAT_LOCKLESS segment, which means - // there are no norms: + // there are no separate norms: for(int i=0;i 0 || dir.fileExists(delFileName))) { + files.add(delFileName); + } + + // Careful logic for norms files: + if (normGen != null) { + for(int i=0;i 0) { + // Definitely a separate norm file, with generation: + files.add(IndexFileNames.fileNameFromGeneration(name, "." + IndexFileNames.SEPARATE_NORMS_EXTENSION + i, gen)); + } else if (-1 == gen) { + // No separate norms but maybe non-separate norms + // in the non compound file case: + if (!hasSingleNormFile && !useCompoundFile) { + String fileName = name + "." + IndexFileNames.SINGLE_NORMS_EXTENSION + i; + if (dir.fileExists(fileName)) { + files.add(fileName); + } + } + } else if (0 == gen) { + // Pre-2.1: we have to check file existence + String fileName = null; + if (useCompoundFile) { + fileName = name + "." + IndexFileNames.SEPARATE_NORMS_EXTENSION + i; + } else if (!hasSingleNormFile) { + fileName = name + "." + IndexFileNames.SINGLE_NORMS_EXTENSION + i; + } + if (fileName != null && dir.fileExists(fileName)) { + files.add(fileName); + } + } + } + } else if (preLockless || (!hasSingleNormFile && !useCompoundFile)) { + // Pre-2.1: we have to scan the dir to find all + // matching _X.sN/_X.fN files for our segment: + String prefix; + if (useCompoundFile) + prefix = name + "." + IndexFileNames.SEPARATE_NORMS_EXTENSION; + else + prefix = name + "." + IndexFileNames.SINGLE_NORMS_EXTENSION; + int prefixLength = prefix.length(); + String[] allFiles = dir.list(); + if (allFiles == null) + throw new IOException("cannot read directory " + dir + ": list() returned null"); + for(int i=0;i prefixLength && Character.isDigit(fileName.charAt(prefixLength)) && fileName.startsWith(prefix)) { + files.add(fileName); + } + } + } + return files; + } } Index: src/java/org/apache/lucene/index/IndexFileNameFilter.java =================================================================== --- src/java/org/apache/lucene/index/IndexFileNameFilter.java (revision 516341) +++ src/java/org/apache/lucene/index/IndexFileNameFilter.java (working copy) @@ -31,12 +31,17 @@ static IndexFileNameFilter singleton = new IndexFileNameFilter(); private HashSet extensions; + private HashSet extensionsInCFS; public IndexFileNameFilter() { extensions = new HashSet(); for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS.length; i++) { extensions.add(IndexFileNames.INDEX_EXTENSIONS[i]); } + extensionsInCFS = new HashSet(); + for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS_IN_COMPOUND_FILE.length; i++) { + extensionsInCFS.add(IndexFileNames.INDEX_EXTENSIONS_IN_COMPOUND_FILE[i]); + } } /* (non-Javadoc) @@ -72,10 +77,7 @@ int i = name.lastIndexOf('.'); if (i != -1) { String extension = name.substring(1+i); - if (extensions.contains(extension) && - !extension.equals("del") && - !extension.equals("gen") && - !extension.equals("cfs")) { + if (extensionsInCFS.contains(extension)) { return true; } if (extension.startsWith("f") && Index: src/java/org/apache/lucene/index/SegmentReader.java =================================================================== --- src/java/org/apache/lucene/index/SegmentReader.java (revision 516341) +++ src/java/org/apache/lucene/index/SegmentReader.java (working copy) @@ -77,15 +77,6 @@ private void reWrite(SegmentInfo si) throws IOException { // NOTE: norms are re-written in regular directory, not cfs - - String oldFileName = si.getNormFileName(this.number); - if (oldFileName != null && !oldFileName.endsWith("." + IndexFileNames.NORMS_EXTENSION)) { - // Mark this file for deletion. Note that we don't - // actually try to delete it until the new segments files is - // successfully written: - deleter.addPendingFile(oldFileName); - } - si.advanceNormGen(this.number); IndexOutput out = directory().createOutput(si.getNormFileName(this.number)); try { @@ -227,14 +218,6 @@ protected void doCommit() throws IOException { if (deletedDocsDirty) { // re-write deleted - String oldDelFileName = si.getDelFileName(); - if (oldDelFileName != null) { - // Mark this file for deletion. Note that we don't - // actually try to delete it until the new segments files is - // successfully written: - deleter.addPendingFile(oldDelFileName); - } - si.advanceDelGen(); // We can write directly to the actual name (vs to a @@ -243,13 +226,6 @@ deletedDocs.write(directory(), si.getDelFileName()); } if (undeleteAll && si.hasDeletions()) { - String oldDelFileName = si.getDelFileName(); - if (oldDelFileName != null) { - // Mark this file for deletion. Note that we don't - // actually try to delete it until the new segments files is - // successfully written: - deleter.addPendingFile(oldDelFileName); - } si.clearDelGen(); } if (normsDirty) { // re-write norms @@ -320,37 +296,7 @@ } Vector files() throws IOException { - Vector files = new Vector(16); - - if (si.getUseCompoundFile()) { - String name = segment + ".cfs"; - if (directory().fileExists(name)) { - files.addElement(name); - } - } else { - for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS.length; i++) { - String name = segment + "." + IndexFileNames.INDEX_EXTENSIONS[i]; - if (directory().fileExists(name)) - files.addElement(name); - } - } - - if (si.hasDeletions()) { - files.addElement(si.getDelFileName()); - } - - boolean addedNrm = false; - for (int i = 0; i < fieldInfos.size(); i++) { - String name = si.getNormFileName(i); - if (name != null && directory().fileExists(name)) { - if (name.endsWith("." + IndexFileNames.NORMS_EXTENSION)) { - if (addedNrm) continue; // add .nrm just once - addedNrm = true; - } - files.addElement(name); - } - } - return files; + return new Vector(si.files()); } public TermEnum terms() { Index: src/java/org/apache/lucene/index/KeepOnlyLastCommitDeletionPolicy.java =================================================================== --- src/java/org/apache/lucene/index/KeepOnlyLastCommitDeletionPolicy.java (revision 0) +++ src/java/org/apache/lucene/index/KeepOnlyLastCommitDeletionPolicy.java (revision 0) @@ -0,0 +1,50 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.List; + +/** + * This {@link IndexDeletionPolicy} implementation that + * keeps only the most recent commit and immediately removes + * all prior commits after a new commit is done. This is + * the default deletion policy. + */ + +public final class KeepOnlyLastCommitDeletionPolicy implements IndexDeletionPolicy { + + /** + * Deletes all commits except the most recent one. + */ + public void onInit(List commits) { + // Note that commits.size() should normally be 1: + onCommit(commits); + } + + /** + * Deletes all commits except the most recent one. + */ + public void onCommit(List commits) { + // Note that commits.size() should normally be 2 (if not + // called by onInit above): + int size = commits.size(); + for(int i=0;iIndexDeletionPolicy + is in use). This file lists each segment by name, has details about the separate norms and deletion files, and also contains the size of each segment.