Index: CHANGES.txt
===================================================================
--- CHANGES.txt (revision 516341)
+++ CHANGES.txt (working copy)
@@ -20,6 +20,15 @@
classes, package-private again (they were unnecessarily made public
as part of LUCENE-701). (Mike McCandless)
+ 3. LUCENE-710: added optional autoCommit boolean to IndexWriter
+ constructors. When this is false, index changes are not committed
+ until the writer is closed. This gives explicit control over when
+ a reader will see the changes. Also added optional custom
+ deletion policy to explicitly control when prior commits are
+ removed from the index. This is intended to allow applications to
+ share an index over NFS by customizing when prior commits are
+ deleted. (Mike McCandless)
+
Bug fixes
1. LUCENE-804: Fixed build.xml to pack a fully compilable src dist. (Doron Cohen)
Index: src/test/org/apache/lucene/store/MockRAMOutputStream.java
===================================================================
--- src/test/org/apache/lucene/store/MockRAMOutputStream.java (revision 516341)
+++ src/test/org/apache/lucene/store/MockRAMOutputStream.java (working copy)
@@ -68,7 +68,7 @@
if (realUsage > dir.maxUsedSize) {
dir.maxUsedSize = realUsage;
}
- throw new IOException("fake disk full at " + dir.sizeInBytes() + " bytes");
+ throw new IOException("fake disk full at " + dir.getRecomputedActualSizeInBytes() + " bytes");
} else {
super.flushBuffer(src, len);
}
Index: src/test/org/apache/lucene/index/TestIndexWriterDelete.java
===================================================================
--- src/test/org/apache/lucene/index/TestIndexWriterDelete.java (revision 516341)
+++ src/test/org/apache/lucene/index/TestIndexWriterDelete.java (working copy)
@@ -25,175 +25,259 @@
"Venice has lots of canals" };
String[] text = { "Amsterdam", "Venice" };
- Directory dir = new RAMDirectory();
- IndexWriter modifier = new IndexWriter(dir,
- new WhitespaceAnalyzer(), true);
- modifier.setUseCompoundFile(true);
- modifier.setMaxBufferedDeleteTerms(1);
+ for(int pass=0;pass<2;pass++) {
+ boolean autoCommit = (0==pass);
- for (int i = 0; i < keywords.length; i++) {
- Document doc = new Document();
- doc.add(new Field("id", keywords[i], Field.Store.YES,
- Field.Index.UN_TOKENIZED));
- doc.add(new Field("country", unindexed[i], Field.Store.YES,
- Field.Index.NO));
- doc.add(new Field("contents", unstored[i], Field.Store.NO,
- Field.Index.TOKENIZED));
- doc
+ Directory dir = new RAMDirectory();
+ IndexWriter modifier = new IndexWriter(dir, autoCommit,
+ new WhitespaceAnalyzer(), true);
+ modifier.setUseCompoundFile(true);
+ modifier.setMaxBufferedDeleteTerms(1);
+
+ for (int i = 0; i < keywords.length; i++) {
+ Document doc = new Document();
+ doc.add(new Field("id", keywords[i], Field.Store.YES,
+ Field.Index.UN_TOKENIZED));
+ doc.add(new Field("country", unindexed[i], Field.Store.YES,
+ Field.Index.NO));
+ doc.add(new Field("contents", unstored[i], Field.Store.NO,
+ Field.Index.TOKENIZED));
+ doc
.add(new Field("city", text[i], Field.Store.YES,
- Field.Index.TOKENIZED));
- modifier.addDocument(doc);
- }
- modifier.optimize();
+ Field.Index.TOKENIZED));
+ modifier.addDocument(doc);
+ }
+ modifier.optimize();
- Term term = new Term("city", "Amsterdam");
- int hitCount = getHitCount(dir, term);
- assertEquals(1, hitCount);
- modifier.deleteDocuments(term);
- hitCount = getHitCount(dir, term);
- assertEquals(0, hitCount);
+ if (!autoCommit) {
+ modifier.close();
+ }
- modifier.close();
+ Term term = new Term("city", "Amsterdam");
+ int hitCount = getHitCount(dir, term);
+ assertEquals(1, hitCount);
+ if (!autoCommit) {
+ modifier = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer());
+ modifier.setUseCompoundFile(true);
+ }
+ modifier.deleteDocuments(term);
+ if (!autoCommit) {
+ modifier.close();
+ }
+ hitCount = getHitCount(dir, term);
+ assertEquals(0, hitCount);
+
+ if (autoCommit) {
+ modifier.close();
+ }
+ dir.close();
+ }
}
// test when delete terms only apply to disk segments
public void testNonRAMDelete() throws IOException {
- Directory dir = new RAMDirectory();
- IndexWriter modifier = new IndexWriter(dir,
- new WhitespaceAnalyzer(), true);
- modifier.setMaxBufferedDocs(2);
- modifier.setMaxBufferedDeleteTerms(2);
+ for(int pass=0;pass<2;pass++) {
+ boolean autoCommit = (0==pass);
- int id = 0;
- int value = 100;
+ Directory dir = new RAMDirectory();
+ IndexWriter modifier = new IndexWriter(dir, autoCommit,
+ new WhitespaceAnalyzer(), true);
+ modifier.setMaxBufferedDocs(2);
+ modifier.setMaxBufferedDeleteTerms(2);
- for (int i = 0; i < 7; i++) {
- addDoc(modifier, ++id, value);
- }
- modifier.flush();
+ int id = 0;
+ int value = 100;
- assertEquals(0, modifier.getRamSegmentCount());
- assertTrue(0 < modifier.getSegmentCount());
+ for (int i = 0; i < 7; i++) {
+ addDoc(modifier, ++id, value);
+ }
+ modifier.flush();
- IndexReader reader = IndexReader.open(dir);
- assertEquals(7, reader.numDocs());
- reader.close();
+ assertEquals(0, modifier.getRamSegmentCount());
+ assertTrue(0 < modifier.getSegmentCount());
- modifier.deleteDocuments(new Term("value", String.valueOf(value)));
- modifier.deleteDocuments(new Term("value", String.valueOf(value)));
+ if (!autoCommit) {
+ modifier.close();
+ }
- reader = IndexReader.open(dir);
- assertEquals(0, reader.numDocs());
- reader.close();
+ IndexReader reader = IndexReader.open(dir);
+ assertEquals(7, reader.numDocs());
+ reader.close();
- modifier.close();
+ if (!autoCommit) {
+ modifier = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer());
+ modifier.setMaxBufferedDocs(2);
+ modifier.setMaxBufferedDeleteTerms(2);
+ }
+
+ modifier.deleteDocuments(new Term("value", String.valueOf(value)));
+ modifier.deleteDocuments(new Term("value", String.valueOf(value)));
+
+ if (!autoCommit) {
+ modifier.close();
+ }
+
+ reader = IndexReader.open(dir);
+ assertEquals(0, reader.numDocs());
+ reader.close();
+ if (autoCommit) {
+ modifier.close();
+ }
+ dir.close();
+ }
}
// test when delete terms only apply to ram segments
public void testRAMDeletes() throws IOException {
- Directory dir = new RAMDirectory();
- IndexWriter modifier = new IndexWriter(dir,
- new WhitespaceAnalyzer(), true);
- modifier.setMaxBufferedDocs(4);
- modifier.setMaxBufferedDeleteTerms(4);
+ for(int pass=0;pass<2;pass++) {
+ boolean autoCommit = (0==pass);
+ Directory dir = new RAMDirectory();
+ IndexWriter modifier = new IndexWriter(dir, autoCommit,
+ new WhitespaceAnalyzer(), true);
+ modifier.setMaxBufferedDocs(4);
+ modifier.setMaxBufferedDeleteTerms(4);
- int id = 0;
- int value = 100;
+ int id = 0;
+ int value = 100;
- addDoc(modifier, ++id, value);
- modifier.deleteDocuments(new Term("value", String.valueOf(value)));
- addDoc(modifier, ++id, value);
- modifier.deleteDocuments(new Term("value", String.valueOf(value)));
+ addDoc(modifier, ++id, value);
+ modifier.deleteDocuments(new Term("value", String.valueOf(value)));
+ addDoc(modifier, ++id, value);
+ modifier.deleteDocuments(new Term("value", String.valueOf(value)));
- assertEquals(2, modifier.getNumBufferedDeleteTerms());
- assertEquals(1, modifier.getBufferedDeleteTermsSize());
+ assertEquals(2, modifier.getNumBufferedDeleteTerms());
+ assertEquals(1, modifier.getBufferedDeleteTermsSize());
- addDoc(modifier, ++id, value);
- assertEquals(0, modifier.getSegmentCount());
- modifier.flush();
+ addDoc(modifier, ++id, value);
+ assertEquals(0, modifier.getSegmentCount());
+ modifier.flush();
- IndexReader reader = IndexReader.open(dir);
- assertEquals(1, reader.numDocs());
+ if (!autoCommit) {
+ modifier.close();
+ }
- int hitCount = getHitCount(dir, new Term("id", String.valueOf(id)));
- assertEquals(1, hitCount);
- reader.close();
+ IndexReader reader = IndexReader.open(dir);
+ assertEquals(1, reader.numDocs());
- modifier.close();
+ int hitCount = getHitCount(dir, new Term("id", String.valueOf(id)));
+ assertEquals(1, hitCount);
+ reader.close();
+ if (autoCommit) {
+ modifier.close();
+ }
+ dir.close();
+ }
}
// test when delete terms apply to both disk and ram segments
public void testBothDeletes() throws IOException {
- Directory dir = new RAMDirectory();
- IndexWriter modifier = new IndexWriter(dir,
- new WhitespaceAnalyzer(), true);
- modifier.setMaxBufferedDocs(100);
- modifier.setMaxBufferedDeleteTerms(100);
+ for(int pass=0;pass<2;pass++) {
+ boolean autoCommit = (0==pass);
- int id = 0;
- int value = 100;
+ Directory dir = new RAMDirectory();
+ IndexWriter modifier = new IndexWriter(dir, autoCommit,
+ new WhitespaceAnalyzer(), true);
+ modifier.setMaxBufferedDocs(100);
+ modifier.setMaxBufferedDeleteTerms(100);
- for (int i = 0; i < 5; i++) {
- addDoc(modifier, ++id, value);
- }
+ int id = 0;
+ int value = 100;
- value = 200;
- for (int i = 0; i < 5; i++) {
- addDoc(modifier, ++id, value);
- }
- modifier.flush();
+ for (int i = 0; i < 5; i++) {
+ addDoc(modifier, ++id, value);
+ }
- for (int i = 0; i < 5; i++) {
- addDoc(modifier, ++id, value);
- }
- modifier.deleteDocuments(new Term("value", String.valueOf(value)));
- modifier.flush();
+ value = 200;
+ for (int i = 0; i < 5; i++) {
+ addDoc(modifier, ++id, value);
+ }
+ modifier.flush();
- IndexReader reader = IndexReader.open(dir);
- assertEquals(5, reader.numDocs());
+ for (int i = 0; i < 5; i++) {
+ addDoc(modifier, ++id, value);
+ }
+ modifier.deleteDocuments(new Term("value", String.valueOf(value)));
- modifier.close();
+ modifier.flush();
+ if (!autoCommit) {
+ modifier.close();
+ }
+
+ IndexReader reader = IndexReader.open(dir);
+ assertEquals(5, reader.numDocs());
+ if (autoCommit) {
+ modifier.close();
+ }
+ }
}
// test that batched delete terms are flushed together
public void testBatchDeletes() throws IOException {
- Directory dir = new RAMDirectory();
- IndexWriter modifier = new IndexWriter(dir,
- new WhitespaceAnalyzer(), true);
- modifier.setMaxBufferedDocs(2);
- modifier.setMaxBufferedDeleteTerms(2);
+ for(int pass=0;pass<2;pass++) {
+ boolean autoCommit = (0==pass);
+ Directory dir = new RAMDirectory();
+ IndexWriter modifier = new IndexWriter(dir, autoCommit,
+ new WhitespaceAnalyzer(), true);
+ modifier.setMaxBufferedDocs(2);
+ modifier.setMaxBufferedDeleteTerms(2);
- int id = 0;
- int value = 100;
+ int id = 0;
+ int value = 100;
- for (int i = 0; i < 7; i++) {
- addDoc(modifier, ++id, value);
- }
- modifier.flush();
+ for (int i = 0; i < 7; i++) {
+ addDoc(modifier, ++id, value);
+ }
+ modifier.flush();
+ if (!autoCommit) {
+ modifier.close();
+ }
- IndexReader reader = IndexReader.open(dir);
- assertEquals(7, reader.numDocs());
- reader.close();
+ IndexReader reader = IndexReader.open(dir);
+ assertEquals(7, reader.numDocs());
+ reader.close();
+
+ if (!autoCommit) {
+ modifier = new IndexWriter(dir, autoCommit,
+ new WhitespaceAnalyzer());
+ modifier.setMaxBufferedDocs(2);
+ modifier.setMaxBufferedDeleteTerms(2);
+ }
- id = 0;
- modifier.deleteDocuments(new Term("id", String.valueOf(++id)));
- modifier.deleteDocuments(new Term("id", String.valueOf(++id)));
+ id = 0;
+ modifier.deleteDocuments(new Term("id", String.valueOf(++id)));
+ modifier.deleteDocuments(new Term("id", String.valueOf(++id)));
- reader = IndexReader.open(dir);
- assertEquals(5, reader.numDocs());
- reader.close();
+ if (!autoCommit) {
+ modifier.close();
+ }
- Term[] terms = new Term[3];
- for (int i = 0; i < terms.length; i++) {
- terms[i] = new Term("id", String.valueOf(++id));
- }
- modifier.deleteDocuments(terms);
+ reader = IndexReader.open(dir);
+ assertEquals(5, reader.numDocs());
+ reader.close();
- reader = IndexReader.open(dir);
- assertEquals(2, reader.numDocs());
- reader.close();
+ Term[] terms = new Term[3];
+ for (int i = 0; i < terms.length; i++) {
+ terms[i] = new Term("id", String.valueOf(++id));
+ }
+ if (!autoCommit) {
+ modifier = new IndexWriter(dir, autoCommit,
+ new WhitespaceAnalyzer());
+ modifier.setMaxBufferedDocs(2);
+ modifier.setMaxBufferedDeleteTerms(2);
+ }
+ modifier.deleteDocuments(terms);
+ if (!autoCommit) {
+ modifier.close();
+ }
+ reader = IndexReader.open(dir);
+ assertEquals(2, reader.numDocs());
+ reader.close();
- modifier.close();
+ if (autoCommit) {
+ modifier.close();
+ }
+ dir.close();
+ }
}
private void addDoc(IndexWriter modifier, int id, int value)
@@ -233,201 +317,203 @@
int START_COUNT = 157;
int END_COUNT = 144;
- // First build up a starting index:
- RAMDirectory startDir = new RAMDirectory();
- IndexWriter writer = new IndexWriter(startDir, new WhitespaceAnalyzer(),
- true);
- for (int i = 0; i < 157; i++) {
- Document d = new Document();
- d.add(new Field("id", Integer.toString(i), Field.Store.YES,
- Field.Index.UN_TOKENIZED));
- d.add(new Field("content", "aaa " + i, Field.Store.NO,
- Field.Index.TOKENIZED));
- writer.addDocument(d);
- }
- writer.close();
+ for(int pass=0;pass<2;pass++) {
+ boolean autoCommit = (0==pass);
- long diskUsage = startDir.sizeInBytes();
- long diskFree = diskUsage + 10;
+ // First build up a starting index:
+ RAMDirectory startDir = new RAMDirectory();
+ IndexWriter writer = new IndexWriter(startDir, autoCommit,
+ new WhitespaceAnalyzer(), true);
+ for (int i = 0; i < 157; i++) {
+ Document d = new Document();
+ d.add(new Field("id", Integer.toString(i), Field.Store.YES,
+ Field.Index.UN_TOKENIZED));
+ d.add(new Field("content", "aaa " + i, Field.Store.NO,
+ Field.Index.TOKENIZED));
+ writer.addDocument(d);
+ }
+ writer.close();
- IOException err = null;
+ long diskUsage = startDir.sizeInBytes();
+ long diskFree = diskUsage + 10;
- boolean done = false;
+ IOException err = null;
- // Iterate w/ ever increasing free disk space:
- while (!done) {
- MockRAMDirectory dir = new MockRAMDirectory(startDir);
- IndexWriter modifier = new IndexWriter(dir,
- new WhitespaceAnalyzer(), false);
+ boolean done = false;
- modifier.setMaxBufferedDocs(1000); // use flush or close
- modifier.setMaxBufferedDeleteTerms(1000); // use flush or close
+ // Iterate w/ ever increasing free disk space:
+ while (!done) {
+ MockRAMDirectory dir = new MockRAMDirectory(startDir);
+ IndexWriter modifier = new IndexWriter(dir, autoCommit,
+ new WhitespaceAnalyzer());
- // For each disk size, first try to commit against
- // dir that will hit random IOExceptions & disk
- // full; after, give it infinite disk space & turn
- // off random IOExceptions & retry w/ same reader:
- boolean success = false;
+ modifier.setMaxBufferedDocs(1000); // use flush or close
+ modifier.setMaxBufferedDeleteTerms(1000); // use flush or close
- for (int x = 0; x < 2; x++) {
+ // For each disk size, first try to commit against
+ // dir that will hit random IOExceptions & disk
+ // full; after, give it infinite disk space & turn
+ // off random IOExceptions & retry w/ same reader:
+ boolean success = false;
- double rate = 0.1;
- double diskRatio = ((double)diskFree) / diskUsage;
- long thisDiskFree;
- String testName;
+ for (int x = 0; x < 2; x++) {
- if (0 == x) {
- thisDiskFree = diskFree;
- if (diskRatio >= 2.0) {
- rate /= 2;
- }
- if (diskRatio >= 4.0) {
- rate /= 2;
- }
- if (diskRatio >= 6.0) {
+ double rate = 0.1;
+ double diskRatio = ((double)diskFree) / diskUsage;
+ long thisDiskFree;
+ String testName;
+
+ if (0 == x) {
+ thisDiskFree = diskFree;
+ if (diskRatio >= 2.0) {
+ rate /= 2;
+ }
+ if (diskRatio >= 4.0) {
+ rate /= 2;
+ }
+ if (diskRatio >= 6.0) {
+ rate = 0.0;
+ }
+ if (debug) {
+ System.out.println("\ncycle: " + diskFree + " bytes");
+ }
+ testName = "disk full during reader.close() @ " + thisDiskFree
+ + " bytes";
+ } else {
+ thisDiskFree = 0;
rate = 0.0;
+ if (debug) {
+ System.out.println("\ncycle: same writer: unlimited disk space");
+ }
+ testName = "reader re-use after disk full";
}
- if (debug) {
- System.out.println("\ncycle: " + diskFree + " bytes");
+
+ dir.setMaxSizeInBytes(thisDiskFree);
+ dir.setRandomIOExceptionRate(rate, diskFree);
+
+ try {
+ if (0 == x) {
+ int docId = 12;
+ for (int i = 0; i < 13; i++) {
+ if (updates) {
+ Document d = new Document();
+ d.add(new Field("id", Integer.toString(i), Field.Store.YES,
+ Field.Index.UN_TOKENIZED));
+ d.add(new Field("content", "bbb " + i, Field.Store.NO,
+ Field.Index.TOKENIZED));
+ modifier.updateDocument(new Term("id", Integer.toString(docId)), d);
+ } else { // deletes
+ modifier.deleteDocuments(new Term("id", Integer.toString(docId)));
+ // modifier.setNorm(docId, "contents", (float)2.0);
+ }
+ docId += 12;
+ }
+ }
+ modifier.close();
+ success = true;
+ if (0 == x) {
+ done = true;
+ }
}
- testName = "disk full during reader.close() @ " + thisDiskFree
- + " bytes";
- } else {
- thisDiskFree = 0;
- rate = 0.0;
- if (debug) {
- System.out.println("\ncycle: same writer: unlimited disk space");
+ catch (IOException e) {
+ if (debug) {
+ System.out.println(" hit IOException: " + e);
+ }
+ err = e;
+ if (1 == x) {
+ e.printStackTrace();
+ fail(testName + " hit IOException after disk space was freed up");
+ }
}
- testName = "reader re-use after disk full";
- }
- dir.setMaxSizeInBytes(thisDiskFree);
- dir.setRandomIOExceptionRate(rate, diskFree);
+ // Whether we succeeded or failed, check that all
+ // un-referenced files were in fact deleted (ie,
+ // we did not create garbage). Just create a
+ // new IndexFileDeleter, have it delete
+ // unreferenced files, then verify that in fact
+ // no files were deleted:
+ String[] startFiles = dir.list();
+ SegmentInfos infos = new SegmentInfos();
+ infos.read(dir);
+ IndexFileDeleter d = new IndexFileDeleter(dir, new KeepOnlyLastCommitDeletionPolicy(), infos, null);
+ String[] endFiles = dir.list();
- try {
- if (0 == x) {
- int docId = 12;
- for (int i = 0; i < 13; i++) {
- if (updates) {
- Document d = new Document();
- d.add(new Field("id", Integer.toString(i), Field.Store.YES,
- Field.Index.UN_TOKENIZED));
- d.add(new Field("content", "bbb " + i, Field.Store.NO,
- Field.Index.TOKENIZED));
- modifier.updateDocument(new Term("id", Integer.toString(docId)), d);
- } else { // deletes
- modifier.deleteDocuments(new Term("id", Integer.toString(docId)));
- // modifier.setNorm(docId, "contents", (float)2.0);
- }
- docId += 12;
+ Arrays.sort(startFiles);
+ Arrays.sort(endFiles);
+
+ // for(int i=0;i
One expected use case for this (and the reason why it + * was first created) is to work around problems with an + * index directory accessed via filesystems like NFS because + * NFS does not provide the "delete on last close" semantics + * that Lucene's "point in time" search normally relies on. + * By implementing a custom deletion policy, such as "a + * commit is only removed once it has been stale for more + * than X minutes", you can give your readers time to + * refresh to the new commit before {@link IndexWriter} + * removes the old commits. Note that doing so will + * increase the storage requirements of the index. See LUCENE-710 + * for details.
+ */ + +public interface IndexDeletionPolicy { + + /** + *This is called once when a writer is first + * instantiated to give the policy a chance to remove old + * commit points.
+ * + *The writer locates all commits present in the index + * and calls this method. The policy may choose to delete + * commit points. To delete a commit point, call the + * {@link IndexCommitPoint#delete} method.
+ * + * @param commits List of {@link IndexCommitPoint}, + * sorted by age (the 0th one is the oldest commit). + */ + public void onInit(List commits) throws IOException; + + /** + *This is called each time the writer commits. This + * gives the policy a chance to remove old commit points + * with each commit.
+ * + *If writer has autoCommit = true then
+ * this method will in general be called many times during
+ * one instance of {@link IndexWriter}. If
+ * autoCommit = false then this method is
+ * only called once when {@link IndexWriter#close} is
+ * called, or not at all if the {@link IndexWriter#abort}
+ * is called. The policy may now choose to delete old
+ * commit points by calling {@link IndexCommitPoint#delete}.
+ *
+ * @param commits List of {@link IndexCommitPoint}>,
+ * sorted by age (the 0th one is the oldest commit).
+ */
+ public void onCommit(List commits) throws IOException;
+}
Property changes on: src/java/org/apache/lucene/index/IndexDeletionPolicy.java
___________________________________________________________________
Name: svn:eol-style
+ native
Index: src/java/org/apache/lucene/index/MultiReader.java
===================================================================
--- src/java/org/apache/lucene/index/MultiReader.java (revision 516341)
+++ src/java/org/apache/lucene/index/MultiReader.java (working copy)
@@ -220,13 +220,6 @@
return new MultiTermPositions(subReaders, starts);
}
- protected void setDeleter(IndexFileDeleter deleter) {
- // Share deleter to our SegmentReaders:
- this.deleter = deleter;
- for (int i = 0; i < subReaders.length; i++)
- subReaders[i].setDeleter(deleter);
- }
-
protected void doCommit() throws IOException {
for (int i = 0; i < subReaders.length; i++)
subReaders[i].commit();
Index: src/java/org/apache/lucene/index/IndexReader.java
===================================================================
--- src/java/org/apache/lucene/index/IndexReader.java (revision 516341)
+++ src/java/org/apache/lucene/index/IndexReader.java (working copy)
@@ -114,7 +114,7 @@
private Directory directory;
private boolean directoryOwner;
private boolean closeDirectory;
- protected IndexFileDeleter deleter;
+ private IndexDeletionPolicy deletionPolicy;
private boolean isClosed;
private SegmentInfos segmentInfos;
@@ -131,30 +131,44 @@
path.
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
- */
+ * @param path the path to the index directory */
public static IndexReader open(String path) throws CorruptIndexException, IOException {
- return open(FSDirectory.getDirectory(path), true);
+ return open(FSDirectory.getDirectory(path), true, null);
}
/** Returns an IndexReader reading the index in an FSDirectory in the named
- path.
+ * path.
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
- */
+ * @param path the path to the index directory */
public static IndexReader open(File path) throws CorruptIndexException, IOException {
- return open(FSDirectory.getDirectory(path), true);
+ return open(FSDirectory.getDirectory(path), true, null);
}
/** Returns an IndexReader reading the index in the given Directory.
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
+ * @param directory the index directory
*/
public static IndexReader open(final Directory directory) throws CorruptIndexException, IOException {
- return open(directory, false);
+ return open(directory, false, null);
}
- private static IndexReader open(final Directory directory, final boolean closeDirectory) throws CorruptIndexException, IOException {
+ /** Expert: returns an IndexReader reading the index in the given
+ * Directory, with a custom {@link IndexDeletionPolicy}.
+ * @param directory the index directory
+ * @param deletionPolicy a custom deletion policy (only used
+ * if you use this reader to perform deletes or to set
+ * norms); see {@link IndexWriter} for details.
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws IOException if there is a low-level IO error
+ */
+ public static IndexReader open(final Directory directory, IndexDeletionPolicy deletionPolicy) throws CorruptIndexException, IOException {
+ return open(directory, false, deletionPolicy);
+ }
+ private static IndexReader open(final Directory directory, final boolean closeDirectory, final IndexDeletionPolicy deletionPolicy) throws CorruptIndexException, IOException {
+
return (IndexReader) new SegmentInfos.FindSegmentsFile(directory) {
protected Object doBody(String segmentFileName) throws CorruptIndexException, IOException {
@@ -162,8 +176,10 @@
SegmentInfos infos = new SegmentInfos();
infos.read(directory, segmentFileName);
+ IndexReader reader;
+
if (infos.size() == 1) { // index is optimized
- return SegmentReader.get(infos, infos.info(0), closeDirectory);
+ reader = SegmentReader.get(infos, infos.info(0), closeDirectory);
} else {
// To reduce the chance of hitting FileNotFound
@@ -184,8 +200,10 @@
}
}
- return new MultiReader(directory, infos, closeDirectory, readers);
+ reader = new MultiReader(directory, infos, closeDirectory, readers);
}
+ reader.deletionPolicy = deletionPolicy;
+ return reader;
}
}.run();
}
@@ -715,20 +733,14 @@
*/
protected final synchronized void commit() throws IOException {
if(hasChanges){
- if (deleter == null) {
- // In the MultiReader case, we share this deleter
- // across all SegmentReaders:
- setDeleter(new IndexFileDeleter(segmentInfos, directory));
- }
if(directoryOwner){
- // Should not be necessary: no prior commit should
- // have left pending files, so just defensive:
- deleter.clearPendingFiles();
+ // Default deleter (for backwards compatibility) is
+ // KeepOnlyLastCommitDeleter:
+ IndexFileDeleter deleter = new IndexFileDeleter(directory,
+ deletionPolicy == null ? new KeepOnlyLastCommitDeletionPolicy() : deletionPolicy,
+ segmentInfos, null);
- String oldInfoFileName = segmentInfos.getCurrentSegmentFileName();
- String nextSegmentsFileName = segmentInfos.getNextSegmentFileName();
-
// Checkpoint the state we are about to change, in
// case we have to roll back:
startCommit();
@@ -749,24 +761,16 @@
// actually in the index):
rollbackCommit();
- // Erase any pending files that we were going to delete:
- deleter.clearPendingFiles();
-
- // Remove possibly partially written next
- // segments file:
- deleter.deleteFile(nextSegmentsFileName);
-
// Recompute deletable files & remove them (so
// partially written .del files, etc, are
// removed):
- deleter.findDeletableFiles();
- deleter.deleteFiles();
+ deleter.refresh();
}
}
- // Attempt to delete all files we just obsoleted:
- deleter.deleteFile(oldInfoFileName);
- deleter.commitPendingFiles();
+ // Have the deleter remove any now unreferenced
+ // files due to this commit:
+ deleter.checkpoint(segmentInfos, true);
if (writeLock != null) {
writeLock.release(); // release write lock
@@ -779,13 +783,6 @@
hasChanges = false;
}
- protected void setDeleter(IndexFileDeleter deleter) {
- this.deleter = deleter;
- }
- protected IndexFileDeleter getDeleter() {
- return deleter;
- }
-
/** Implements commit. */
protected abstract void doCommit() throws IOException;
Index: src/java/org/apache/lucene/index/IndexCommitPoint.java
===================================================================
--- src/java/org/apache/lucene/index/IndexCommitPoint.java (revision 0)
+++ src/java/org/apache/lucene/index/IndexCommitPoint.java (revision 0)
@@ -0,0 +1,41 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Represents a single commit into an index as seen by the
+ * {@link IndexDeletionPolicy}.
+ */
+
+public interface IndexCommitPoint {
+
+ /**
+ * Get the segments file (ie, segments_N) of
+ * this commit point.
+ */
+ public String getSegmentsFileName();
+
+ /**
+ * Notify the writer that this commit point should be
+ * deleted. This should only be called by the {@link
+ * IndexDeletionPolicy} during its {@link
+ * IndexDeletionPolicy#onInit} or {@link
+ * IndexDeletionPolicy#onCommit} method.
+ */
+ public void delete();
+}
Property changes on: src/java/org/apache/lucene/index/IndexCommitPoint.java
___________________________________________________________________
Name: svn:eol-style
+ native
Index: src/java/org/apache/lucene/index/IndexFileNames.java
===================================================================
--- src/java/org/apache/lucene/index/IndexFileNames.java (revision 516341)
+++ src/java/org/apache/lucene/index/IndexFileNames.java (working copy)
@@ -37,7 +37,19 @@
/** Extension of norms file */
static final String NORMS_EXTENSION = "nrm";
-
+
+ /** Extension of compound file */
+ static final String COMPOUND_FILE_EXTENSION = "cfs";
+
+ /** Extension of deletes */
+ static final String DELETES_EXTENSION = "del";
+
+ /** Extension of single norms */
+ static final String SINGLE_NORMS_EXTENSION = "f";
+
+ /** Extension of separate norms */
+ static final String SEPARATE_NORMS_EXTENSION = "s";
+
/**
* This array contains all filename extensions used by
* Lucene's index files, with two exceptions, namely the
@@ -50,6 +62,13 @@
"cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del",
"tvx", "tvd", "tvf", "gen", "nrm"
};
+
+ /** File extensions that are added to a compound file
+ * (same as above, minus "del", "gen", "cfs"). */
+ static final String[] INDEX_EXTENSIONS_IN_COMPOUND_FILE = new String[] {
+ "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx",
+ "tvx", "tvd", "tvf", "nrm"
+ };
/** File extensions of old-style index files */
static final String COMPOUND_EXTENSIONS[] = new String[] {
Index: src/java/org/apache/lucene/index/SegmentInfos.java
===================================================================
--- src/java/org/apache/lucene/index/SegmentInfos.java (revision 516341)
+++ src/java/org/apache/lucene/index/SegmentInfos.java (working copy)
@@ -88,16 +88,9 @@
for (int i = 0; i < files.length; i++) {
String file = files[i];
if (file.startsWith(IndexFileNames.SEGMENTS) && !file.equals(IndexFileNames.SEGMENTS_GEN)) {
- if (file.equals(IndexFileNames.SEGMENTS)) {
- // Pre lock-less commits:
- if (max == -1) {
- max = 0;
- }
- } else {
- long v = Long.parseLong(file.substring(prefixLen), Character.MAX_RADIX);
- if (v > max) {
- max = v;
- }
+ long gen = generationFromSegmentsFileName(file);
+ if (gen > max) {
+ max = gen;
}
}
}
@@ -152,6 +145,22 @@
}
/**
+ * Parse the generation off the segments file name and
+ * return it.
+ */
+ public static long generationFromSegmentsFileName(String fileName) {
+ if (fileName.equals(IndexFileNames.SEGMENTS)) {
+ return 0;
+ } else if (fileName.startsWith(IndexFileNames.SEGMENTS)) {
+ return Long.parseLong(fileName.substring(1+IndexFileNames.SEGMENTS.length()),
+ Character.MAX_RADIX);
+ } else {
+ throw new IllegalArgumentException("fileName \"" + fileName + "\" is not a segments file");
+ }
+ }
+
+
+ /**
* Get the next segments_N filename that will be written.
*/
public String getNextSegmentFileName() {
@@ -181,12 +190,8 @@
IndexInput input = directory.openInput(segmentFileName);
- if (segmentFileName.equals(IndexFileNames.SEGMENTS)) {
- generation = 0;
- } else {
- generation = Long.parseLong(segmentFileName.substring(1+IndexFileNames.SEGMENTS.length()),
- Character.MAX_RADIX);
- }
+ generation = generationFromSegmentsFileName(segmentFileName);
+
lastGeneration = generation;
try {
@@ -255,6 +260,8 @@
IndexOutput output = directory.createOutput(segmentFileName);
+ boolean success = false;
+
try {
output.writeInt(FORMAT_SINGLE_NORM_FILE); // write FORMAT
output.writeLong(++version); // every write changes
@@ -266,7 +273,16 @@
}
}
finally {
- output.close();
+ try {
+ output.close();
+ success = true;
+ } finally {
+ if (!success) {
+ // Try not to leave a truncated segments_N file in
+ // the index:
+ directory.deleteFile(segmentFileName);
+ }
+ }
}
try {
@@ -305,6 +321,9 @@
public long getVersion() {
return version;
}
+ public long getGeneration() {
+ return generation;
+ }
/**
* Current version number from segments file.
Index: src/java/org/apache/lucene/index/IndexWriter.java
===================================================================
--- src/java/org/apache/lucene/index/IndexWriter.java (revision 516341)
+++ src/java/org/apache/lucene/index/IndexWriter.java (working copy)
@@ -29,48 +29,100 @@
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
-import java.util.Vector;
-import java.util.HashSet;
+import java.util.ArrayList;
+import java.util.List;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map.Entry;
/**
- An IndexWriter creates and maintains an index.
+ An IndexWriter creates and maintains an index.
-
The third argument (create) to the
+
The create argument to the
constructor
determines whether a new index is created, or whether an existing index is
- opened for the addition of new documents. Note that you
- can open an index with create=true even while readers are
+ opened. Note that you
+ can open an index with create=true even while readers are
using the index. The old readers will continue to search
the "point in time" snapshot they had opened, and won't
- see the newly created index until they re-open.
create argument which
+ will create a new index if there is not already an index at the
+ provided path and otherwise open the existing index.
- In either case, documents are added with the addDocument method. - When finished adding documents, close should be called.
+In either case, documents are added with addDocument + and removed with deleteDocuments. + A document can be updated with updateDocument + (which just deletes and then adds). When finished adding, deleting and updating documents, close should be called.
+These changes are buffered in memory and periodically + flushed to the {@link Directory} (during the above method calls). A flush is triggered when there are + enough buffered deletes (see {@link + #setMaxBufferedDeleteTerms}) or enough added documents + (see {@link #setMaxBufferedDocs}) since the last flush, + whichever is sooner. When a flush occurs, both pending + deletes and added documents are flushed to the index. A + flush may also trigger one or more segment merges.
+ + +The optional autoCommit argument to the
+ constructors
+ controls visibility of the changes to {@link IndexReader} instances reading the same index.
+ When this is false, changes are not
+ visible until {@link #close()} is called.
+ Note that changes will still be flushed to the
+ {@link org.apache.lucene.store.Directory} as new files,
+ but are not committed (no new segments_N file
+ is written referencing the new files) until {@link #close} is
+ called. If something goes terribly wrong (for example the
+ JVM crashes) before {@link #close()}, then
+ the index will reflect none of the changes made (it will
+ remain in its starting state).
+ You can also call {@link #abort()}, which closes the writer without committing any
+ changes, and removes any index
+ files that had been flushed but are now unreferenced.
+ This mode is useful for preventing readers from refreshing
+ at a bad time (for example after you've done all your
+ deletes but before you've done your adds).
+ It can also be used to implement simple single-writer
+ transactional semantics ("all or none").
When autoCommit is true then
+ every flush is also a commit ({@link IndexReader}
+ instances will see each flush as changes to the index).
+ This is the default, to match the behavior before 2.2.
+ When running in this mode, be careful not to refresh your
+ readers while optimize or segment merges are taking place
+ as this can tie up substantial disk space.
If an index will not have more documents added for a while and optimal search performance is desired, then the optimize method should be called before the index is closed.
- -Opening an IndexWriter creates a lock file for the directory in use. Trying to open - another IndexWriter on the same directory will lead to a + +
Opening an IndexWriter creates a lock file for the directory in use. Trying to open
+ another IndexWriter on the same directory will lead to a
{@link LockObtainFailedException}. The {@link LockObtainFailedException}
is also thrown if an IndexReader on the same directory is used to delete documents
from the index.
As of 2.1, IndexWriter can now delete documents
- by {@link Term} (see {@link #deleteDocuments} ) and update
- (delete then add) documents (see {@link #updateDocument}).
- Deletes are buffered until {@link
- #setMaxBufferedDeleteTerms} Terms at which
- point they are flushed to the index. Note that a flush
- occurs when there are enough buffered deletes or enough
- added documents, whichever is sooner. When a flush
- occurs, both pending deletes and added documents are
- flushed to the index.
Expert: IndexWriter allows an optional
+ {@link IndexDeletionPolicy} implementation to be
+ specified. You can use this to control when prior commits
+ are deleted from the index. The default policy is {@link
+ KeepOnlyLastCommitDeletionPolicy} which removes all prior
+ commits as soon as a new commit is done (this matches
+ behavior before 2.2). Creating your own policy can allow
+ you to explicitly keep previous "point in time" commits
+ alive in the index for some time, to allow readers to
+ refresh to the new commit without having the old commit
+ deleted out from under them. This is necessary on
+ filesystems like NFS that do not support "delete on last
+ close" semantics, which Lucene's "point in time" search
+ normally relies on.
path, creating it first if it does not
- * already exist, otherwise appending to the existing
- * index. Text will be analyzed with a.
+ * path, first creating it if it does not
+ * already exist. Text will be analyzed with
+ * a.
*
* @param path the path to the index directory
* @param a the analyzer to use
@@ -309,18 +366,13 @@
*/
public IndexWriter(String path, Analyzer a)
throws CorruptIndexException, LockObtainFailedException, IOException {
- if (IndexReader.indexExists(path)) {
- init(path, a, false);
- } else {
- init(path, a, true);
- }
+ init(FSDirectory.getDirectory(path), a, true, null, true);
}
/**
* Constructs an IndexWriter for the index in
- * path, creating it first if it does not
- * already exist, otherwise appending to the existing
- * index. Text will be analyzed with
+ * path, first creating it if it does not
+ * already exist. Text will be analyzed with
* a.
*
* @param path the path to the index directory
@@ -335,18 +387,14 @@
*/
public IndexWriter(File path, Analyzer a)
throws CorruptIndexException, LockObtainFailedException, IOException {
- if (IndexReader.indexExists(path)) {
- init(path, a, false);
- } else {
- init(path, a, true);
- }
+ init(FSDirectory.getDirectory(path), a, true, null, true);
}
/**
* Constructs an IndexWriter for the index in
- * d, creating it first if it does not
- * already exist, otherwise appending to the existing
- * index. Text will be analyzed with a.
+ * d, first creating it if it does not
+ * already exist. Text will be analyzed with
+ * a.
*
* @param d the index directory
* @param a the analyzer to use
@@ -360,28 +408,124 @@
*/
public IndexWriter(Directory d, Analyzer a)
throws CorruptIndexException, LockObtainFailedException, IOException {
- if (IndexReader.indexExists(d)) {
- init(d, a, false, false);
- } else {
- init(d, a, true, false);
- }
+ init(d, a, false, null, true);
}
- private void init(String path, Analyzer a, final boolean create)
+ /**
+ * Constructs an IndexWriter for the index in
+ * d, first creating it if it does not
+ * already exist. Text will be analyzed with
+ * a.
+ *
+ * @param d the index directory
+ * @param autoCommit see above
+ * @param a the analyzer to use
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws LockObtainFailedException if another writer
+ * has this index open (write.lock could not
+ * be obtained)
+ * @throws IOException if the directory cannot be
+ * read/written to or if there is any other low-level
+ * IO error
+ */
+ public IndexWriter(Directory d, boolean autoCommit, Analyzer a)
throws CorruptIndexException, LockObtainFailedException, IOException {
- init(FSDirectory.getDirectory(path), a, create, true);
+ init(d, a, false, null, autoCommit);
}
- private void init(File path, Analyzer a, final boolean create)
+ /**
+ * Constructs an IndexWriter for the index in d.
+ * Text will be analyzed with a. If create
+ * is true, then a new, empty index will be created in
+ * d, replacing the index already there, if any.
+ *
+ * @param d the index directory
+ * @param autoCommit see above
+ * @param a the analyzer to use
+ * @param create true to create the index or overwrite
+ * the existing one; false to append to the existing
+ * index
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws LockObtainFailedException if another writer
+ * has this index open (write.lock could not
+ * be obtained)
+ * @throws IOException if the directory cannot be read/written to, or
+ * if it does not exist and create is
+ * false or if there is any other low-level
+ * IO error
+ */
+ public IndexWriter(Directory d, boolean autoCommit, Analyzer a, boolean create)
+ throws CorruptIndexException, LockObtainFailedException, IOException {
+ init(d, a, create, false, null, autoCommit);
+ }
+
+ /**
+ * Expert: constructs an IndexWriter with a custom {@link
+ * IndexDeletionPolicy}, for the index in d,
+ * first creating it if it does not already exist. Text
+ * will be analyzed with a.
+ *
+ * @param d the index directory
+ * @param autoCommit see above
+ * @param a the analyzer to use
+ * @param deletionPolicy see above
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws LockObtainFailedException if another writer
+ * has this index open (write.lock could not
+ * be obtained)
+ * @throws IOException if the directory cannot be
+ * read/written to or if there is any other low-level
+ * IO error
+ */
+ public IndexWriter(Directory d, boolean autoCommit, Analyzer a, IndexDeletionPolicy deletionPolicy)
throws CorruptIndexException, LockObtainFailedException, IOException {
- init(FSDirectory.getDirectory(path), a, create, true);
+ init(d, a, false, deletionPolicy, autoCommit);
}
- private void init(Directory d, Analyzer a, final boolean create, boolean closeDir)
+ /**
+ * Expert: constructs an IndexWriter with a custom {@link
+ * IndexDeletionPolicy}, for the index in d.
+ * Text will be analyzed with a. If
+ * create is true, then a new, empty index
+ * will be created in d, replacing the index
+ * already there, if any.
+ *
+ * @param d the index directory
+ * @param autoCommit see above
+ * @param a the analyzer to use
+ * @param create true to create the index or overwrite
+ * the existing one; false to append to the existing
+ * index
+ * @param deletionPolicy see above
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws LockObtainFailedException if another writer
+ * has this index open (write.lock could not
+ * be obtained)
+ * @throws IOException if the directory cannot be read/written to, or
+ * if it does not exist and create is
+ * false or if there is any other low-level
+ * IO error
+ */
+ public IndexWriter(Directory d, boolean autoCommit, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy)
+ throws CorruptIndexException, LockObtainFailedException, IOException {
+ init(d, a, create, false, deletionPolicy, autoCommit);
+ }
+
+ private void init(Directory d, Analyzer a, boolean closeDir, IndexDeletionPolicy deletionPolicy, boolean autoCommit)
throws CorruptIndexException, LockObtainFailedException, IOException {
+ if (IndexReader.indexExists(d)) {
+ init(d, a, false, closeDir, deletionPolicy, autoCommit);
+ } else {
+ init(d, a, true, closeDir, deletionPolicy, autoCommit);
+ }
+ }
+
+ private void init(Directory d, Analyzer a, final boolean create, boolean closeDir, IndexDeletionPolicy deletionPolicy, boolean autoCommit)
+ throws CorruptIndexException, LockObtainFailedException, IOException {
this.closeDir = closeDir;
directory = d;
analyzer = a;
+ this.infoStream = defaultInfoStream;
if (create) {
// Clear the write lock in case it's leftover:
@@ -410,13 +554,17 @@
segmentInfos.read(directory);
}
- // Create a deleter to keep track of which files can
- // be deleted:
- deleter = new IndexFileDeleter(segmentInfos, directory);
- deleter.setInfoStream(infoStream);
- deleter.findDeletableFiles();
- deleter.deleteFiles();
+ this.autoCommit = autoCommit;
+ if (!autoCommit) {
+ rollbackSegmentInfos = (SegmentInfos) segmentInfos.clone();
+ }
+ // Default deleter (for backwards compatibility) is
+ // KeepOnlyLastCommitDeleter:
+ deleter = new IndexFileDeleter(directory,
+ deletionPolicy == null ? new KeepOnlyLastCommitDeletionPolicy() : deletionPolicy,
+ segmentInfos, infoStream);
+
} catch (IOException e) {
this.writeLock.release();
this.writeLock = null;
@@ -533,11 +681,28 @@
return mergeFactor;
}
- /** If non-null, information about merges and a message when
- * maxFieldLength is reached will be printed to this.
+ /** If non-null, this will be the default infoStream used
+ * by a newly instantiated IndexWriter.
+ * @see #setInfoStream
*/
+ public static void setDefaultInfoStream(PrintStream infoStream) {
+ IndexWriter.defaultInfoStream = infoStream;
+ }
+
+ /**
+ * @see #setDefaultInfoStream
+ */
+ public static PrintStream getDefaultInfoStream() {
+ return IndexWriter.defaultInfoStream;
+ }
+
+ /** If non-null, information about merges, deletes and a
+ * message when maxFieldLength is reached will be printed
+ * to this.
+ */
public void setInfoStream(PrintStream infoStream) {
this.infoStream = infoStream;
+ deleter.setInfoStream(infoStream);
}
/**
@@ -613,6 +778,14 @@
*/
public synchronized void close() throws CorruptIndexException, IOException {
flushRamSegments();
+
+ if (commitPending) {
+ segmentInfos.write(directory); // now commit changes
+ deleter.checkpoint(segmentInfos, true);
+ commitPending = false;
+ rollbackSegmentInfos = null;
+ }
+
ramDirectory.close();
if (writeLock != null) {
writeLock.release(); // release write lock
@@ -737,7 +910,9 @@
dw.setInfoStream(infoStream);
String segmentName = newRamSegmentName();
dw.addDocument(segmentName, doc);
- return new SegmentInfo(segmentName, 1, ramDirectory, false, false);
+ SegmentInfo si = new SegmentInfo(segmentName, 1, ramDirectory, false, false);
+ si.setNumFields(dw.getNumFields());
+ return si;
}
/**
@@ -871,6 +1046,7 @@
*/
private PrintStream infoStream = null;
+ private static PrintStream defaultInfoStream = null;
/** Merges all segments together into a single segment,
* optimizing an index for search.
@@ -949,21 +1125,18 @@
* merges that happen (or ram segments flushed) will not
* write a new segments file and will not remove any files
* that were present at the start of the transaction. You
- * must make a matched (try/finall) call to
+ * must make a matched (try/finally) call to
* commitTransaction() or rollbackTransaction() to finish
* the transaction.
*/
private void startTransaction() throws IOException {
- if (inTransaction) {
- throw new IOException("transaction is already in process");
+ localRollbackSegmentInfos = (SegmentInfos) segmentInfos.clone();
+ localAutoCommit = autoCommit;
+ if (localAutoCommit) {
+ flushRamSegments();
+ // Turn off auto-commit during our local transaction:
+ autoCommit = false;
}
- rollbackSegmentInfos = (SegmentInfos) segmentInfos.clone();
- protectedSegments = new HashSet();
- for(int i=0;iautoCommit=false.
+ * @throws IllegalStateException if this is called when
+ * the writer was opened with autoCommit=true.
+ * @throws IOException if there is a low-level IO error
+ */
+ public void abort() throws IOException {
+ if (!autoCommit) {
+ // Keep the same segmentInfos instance but replace all
+ // of its SegmentInfo instances. This is so the next
+ // attempt to commit using this instance of IndexWriter
+ // will always write to a new generation ("write once").
+ segmentInfos.clear();
+ segmentInfos.addAll(rollbackSegmentInfos);
+ // Ask deleter to locate unreferenced files & remove
+ // them:
+ deleter.checkpoint(segmentInfos, false);
+ deleter.refresh();
+ ramSegmentInfos = new SegmentInfos();
+ bufferedDeleteTerms.clear();
+ numBufferedDeleteTerms = 0;
+
+ commitPending = false;
+ close();
+
+ } else {
+ throw new IllegalStateException("abort() can only be called when IndexWriter was opened with autoCommit=false");
+ }
+ }
+
+ /*
+ * Called whenever the SegmentInfos has been updated and
+ * the index files referenced exist (correctly) in the
+ * index directory. If we are in autoCommit mode, we
+ * commit the change immediately. Else, we mark
+ * commitPending.
+ */
+ private void checkpoint() throws IOException {
+ if (autoCommit) {
+ segmentInfos.write(directory);
+ } else {
+ commitPending = true;
+ }
+ }
+
/** Merges all segments from an array of indexes into this index.
*
* This may be used to parallelize batch indexing. A large document
@@ -1266,16 +1484,13 @@
final String mergedName = newSegmentName();
SegmentMerger merger = new SegmentMerger(this, mergedName);
- final Vector segmentsToDelete = new Vector();
SegmentInfo info;
- String segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName();
IndexReader sReader = null;
try {
if (segmentInfos.size() == 1){ // add existing index, if any
sReader = SegmentReader.get(segmentInfos.info(0));
merger.add(sReader);
- segmentsToDelete.addElement(sReader); // queue segment for deletion
}
for (int i = 0; i < readers.length; i++) // add new indexes
@@ -1288,16 +1503,15 @@
try {
int docCount = merger.merge(); // merge 'em
- segmentInfos.setSize(0); // pop old infos & add new
- info = new SegmentInfo(mergedName, docCount, directory, false, true);
- segmentInfos.addElement(info);
- commitPending = true;
-
if(sReader != null) {
sReader.close();
sReader = null;
}
+ segmentInfos.setSize(0); // pop old infos & add new
+ info = new SegmentInfo(mergedName, docCount, directory, false, true);
+ segmentInfos.addElement(info);
+
success = true;
} finally {
@@ -1312,26 +1526,16 @@
sReader.close();
}
}
+
+ if (useCompoundFile) {
- deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file
- deleter.deleteSegments(segmentsToDelete); // delete now-unused segments
-
- if (useCompoundFile) {
boolean success = false;
- segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName();
- Vector filesToDelete;
-
startTransaction();
try {
-
- filesToDelete = merger.createCompoundFile(mergedName + ".cfs");
-
+ merger.createCompoundFile(mergedName + ".cfs");
info.setUseCompoundFile(true);
- commitPending = true;
- success = true;
-
} finally {
if (!success) {
rollbackTransaction();
@@ -1339,9 +1543,6 @@
commitTransaction();
}
}
-
- deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file
- deleter.deleteFiles(filesToDelete); // delete now unused files of segment
}
}
@@ -1500,14 +1701,12 @@
final String mergedName = newSegmentName();
SegmentMerger merger = null;
- final Vector segmentsToDelete = new Vector();
+ final List ramSegmentsToDelete = new ArrayList();
- String segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName();
- String nextSegmentsFileName = segmentInfos.getNextSegmentFileName();
-
SegmentInfo newSegment = null;
int mergedDocCount = 0;
+ boolean anyDeletes = (bufferedDeleteTerms.size() != 0);
// This is try/finally to make sure merger's readers are closed:
try {
@@ -1522,9 +1721,9 @@
infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
IndexReader reader = SegmentReader.get(si); // no need to set deleter (yet)
merger.add(reader);
- if ((reader.directory() == this.directory) || // if we own the directory
- (reader.directory() == this.ramDirectory))
- segmentsToDelete.addElement(reader); // queue segment for deletion
+ if (reader.directory() == this.ramDirectory) {
+ ramSegmentsToDelete.add(si);
+ }
}
}
@@ -1545,9 +1744,8 @@
newSegment = new SegmentInfo(mergedName, mergedDocCount,
directory, false, true);
}
-
- if (!inTransaction
- && (sourceSegments != ramSegmentInfos || bufferedDeleteTerms.size() > 0)) {
+
+ if (sourceSegments != ramSegmentInfos || anyDeletes) {
// Now save the SegmentInfo instances that
// we are replacing:
rollback = (SegmentInfos) segmentInfos.clone();
@@ -1565,19 +1763,12 @@
}
if (sourceSegments == ramSegmentInfos) {
- // Should not be necessary: no prior commit should
- // have left pending files, so just defensive:
- deleter.clearPendingFiles();
maybeApplyDeletes(doMerge);
doAfterFlush();
}
+
+ checkpoint();
- if (!inTransaction) {
- segmentInfos.write(directory); // commit before deleting
- } else {
- commitPending = true;
- }
-
success = true;
} finally {
@@ -1589,11 +1780,10 @@
if (sourceSegments == ramSegmentInfos) {
ramSegmentInfos.removeAllElements();
}
- } else if (!inTransaction) {
+ } else {
// Must rollback so our state matches index:
-
- if (sourceSegments == ramSegmentInfos && 0 == bufferedDeleteTerms.size()) {
+ if (sourceSegments == ramSegmentInfos && !anyDeletes) {
// Simple case: newSegment may or may not have
// been added to the end of our segment infos,
// so just check & remove if so:
@@ -1611,14 +1801,8 @@
segmentInfos.addAll(rollback);
}
- // Erase any pending files that we were going to delete:
- // i.e. old del files added by SegmentReader.doCommit()
- deleter.clearPendingFiles();
-
- // Delete any partially created files:
- deleter.deleteFile(nextSegmentsFileName);
- deleter.findDeletableFiles();
- deleter.deleteFiles();
+ // Delete any partially created and now unreferenced files:
+ deleter.refresh();
}
}
} finally {
@@ -1626,53 +1810,33 @@
if (doMerge) merger.closeReaders();
}
- if (!inTransaction) {
- // Attempt to delete all files we just obsoleted:
- deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file
- deleter.deleteSegments(segmentsToDelete); // delete now-unused segments
- // Includes the old del files
- deleter.commitPendingFiles();
- } else {
- deleter.addPendingFile(segmentsInfosFileName); // delete old segments_N file
- deleter.deleteSegments(segmentsToDelete, protectedSegments); // delete now-unused segments
- }
+ // Delete the RAM segments
+ deleter.deleteDirect(ramDirectory, ramSegmentsToDelete);
+ // Give deleter a chance to remove files now.
+ deleter.checkpoint(segmentInfos, autoCommit);
+
if (useCompoundFile && doMerge) {
- segmentsInfosFileName = nextSegmentsFileName;
- nextSegmentsFileName = segmentInfos.getNextSegmentFileName();
-
- Vector filesToDelete;
-
boolean success = false;
try {
- filesToDelete = merger.createCompoundFile(mergedName + ".cfs");
+ merger.createCompoundFile(mergedName + ".cfs");
newSegment.setUseCompoundFile(true);
- if (!inTransaction) {
- segmentInfos.write(directory); // commit again so readers know we've switched this segment to a compound file
- }
+ checkpoint();
success = true;
} finally {
- if (!success && !inTransaction) {
+ if (!success) {
// Must rollback:
newSegment.setUseCompoundFile(false);
- deleter.deleteFile(mergedName + ".cfs");
- deleter.deleteFile(nextSegmentsFileName);
+ deleter.refresh();
}
}
-
- if (!inTransaction) {
- deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file
- }
-
- // We can delete these segments whether or not we are
- // in a transaction because we had just written them
- // above so they can't need protection by the
- // transaction:
- deleter.deleteFiles(filesToDelete); // delete now-unused segments
+
+ // Give deleter a chance to remove files now.
+ deleter.checkpoint(segmentInfos, autoCommit);
}
return mergedDocCount;
@@ -1692,7 +1856,6 @@
IndexReader reader = null;
try {
reader = SegmentReader.get(segmentInfos.info(segmentInfos.size() - 1));
- reader.setDeleter(deleter);
// Apply delete terms to the segment just flushed from ram
// apply appropriately so that a delete term is only applied to
@@ -1718,7 +1881,6 @@
IndexReader reader = null;
try {
reader = SegmentReader.get(segmentInfos.info(i));
- reader.setDeleter(deleter);
// Apply delete terms to disk segments
// except the one just flushed from ram.
@@ -1769,7 +1931,7 @@
}
// Number of ram segments a delete term applies to.
- private class Num {
+ private static class Num {
private int num;
Num(int num) {
Index: src/java/org/apache/lucene/index/DocumentWriter.java
===================================================================
--- src/java/org/apache/lucene/index/DocumentWriter.java (revision 516341)
+++ src/java/org/apache/lucene/index/DocumentWriter.java (working copy)
@@ -388,6 +388,9 @@
this.infoStream = infoStream;
}
+ int getNumFields() {
+ return fieldInfos.size();
+ }
}
final class Posting { // info about a Term in a doc
Index: src/java/org/apache/lucene/index/IndexFileDeleter.java
===================================================================
--- src/java/org/apache/lucene/index/IndexFileDeleter.java (revision 516341)
+++ src/java/org/apache/lucene/index/IndexFileDeleter.java (working copy)
@@ -18,286 +18,488 @@
*/
import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.IndexFileNameFilter;
import org.apache.lucene.index.SegmentInfos;
+import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import java.io.IOException;
import java.io.PrintStream;
-import java.util.Vector;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Collections;
-/**
- * A utility class (used by both IndexReader and
- * IndexWriter) to keep track of files that need to be
- * deleted because they are no longer referenced by the
- * index.
+/*
+ * This class keeps track of each SegmentInfos instance that
+ * is still "live", either because it corresponds to a
+ * segments_N in the Directory (a real commit) or because
+ * it's the in-memory SegmentInfos that a writer is actively
+ * updating but has not yet committed (currently this only
+ * applies when autoCommit=false in IndexWriter). This
+ * class uses simple reference counting to map the live
+ * SegmentInfos instances to individual files in the
+ * Directory.
+ *
+ * A separate deletion policy interface
+ * (IndexDeletionPolicy) is consulted on creation (onInit)
+ * and once per commit (onCommit), to decide when a commit
+ * should be removed.
+ *
+ * The current default deletion policy is {@link
+ * KeepOnlyLastCommitDeletionPolicy}, which removes all
+ * prior commits when a new commit has completed. This
+ * matches the behavior before 2.2.
+ *
+ * Note that you must hold the write.lock before
+ * instantiating this class. It opens segments_N file(s)
+ * directly with no retry logic.
*/
+
final class IndexFileDeleter {
- private Vector deletable;
- private HashSet pending;
+
+ /* Files that we tried to delete but failed (likely
+ * because they are open and we are running on Windows),
+ * so we will retry them again later: */
+ private List deletable;
+
+ /* Reference count for all files in the index. Maps
+ * String to RefCount (class below) instances: */
+ private HashMap refCounts = new HashMap();
+
+ /* Holds all commits (segments_N) currently in the index.
+ * This will have just 1 commit if you are using the
+ * default delete policy (KeepOnlyLastCommitDeletionPolicy).
+ * Other policies may leave commit points live for longer
+ * in which case this list would be longer than 1: */
+ private List commits = new ArrayList();
+
+ /* Holds files we had incref'd from the previous
+ * non-commit checkpoint: */
+ private List lastFiles = new ArrayList();
+
+ private PrintStream infoStream;
+ private List toDelete = new ArrayList();
private Directory directory;
- private SegmentInfos segmentInfos;
- private PrintStream infoStream;
+ private IndexDeletionPolicy policy;
- IndexFileDeleter(SegmentInfos segmentInfos, Directory directory)
- throws IOException {
- this.segmentInfos = segmentInfos;
- this.directory = directory;
- }
- void setSegmentInfos(SegmentInfos segmentInfos) {
- this.segmentInfos = segmentInfos;
- }
- SegmentInfos getSegmentInfos() {
- return segmentInfos;
- }
-
void setInfoStream(PrintStream infoStream) {
this.infoStream = infoStream;
}
+
+ private void message(String message) {
+ infoStream.println(this + " " + Thread.currentThread().getName() + ": " + message);
+ }
- /** Determine index files that are no longer referenced
- * and therefore should be deleted. This is called once
- * (by the writer), and then subsequently we add onto
- * deletable any files that are no longer needed at the
- * point that we create the unused file (eg when merging
- * segments), and we only remove from deletable when a
- * file is successfully deleted.
+ /**
+ * Initialize the deleter: find all previous commits in
+ * the Directory, incref the files they reference, call
+ * the policy to let it delete commits. The incoming
+ * segmentInfos must have been loaded from a commit point
+ * and not yet modified. This will remove any files not
+ * referenced by any of the commits.
+ * @throws CorruptIndexException if the index is corrupt
+ * @throws IOException if there is a low-level IO error
*/
+ public IndexFileDeleter(Directory directory, IndexDeletionPolicy policy, SegmentInfos segmentInfos, PrintStream infoStream)
+ throws CorruptIndexException, IOException {
- void findDeletableFiles() throws IOException {
+ this.infoStream = infoStream;
+ this.policy = policy;
+ this.directory = directory;
- // Gather all "current" segments:
- HashMap current = new HashMap();
- for(int j=0;j