Index: src/test/org/apache/lucene/store/TestLockFactory.java
===================================================================
--- src/test/org/apache/lucene/store/TestLockFactory.java (revision 468583)
+++ src/test/org/apache/lucene/store/TestLockFactory.java (working copy)
@@ -57,9 +57,9 @@
// Both write lock and commit lock should have been created:
assertEquals("# of unique locks created (after instantiating IndexWriter)",
- 2, lf.locksCreated.size());
- assertTrue("# calls to makeLock <= 2 (after instantiating IndexWriter)",
- lf.makeLockCount > 2);
+ 1, lf.locksCreated.size());
+ assertTrue("# calls to makeLock is 0 (after instantiating IndexWriter)",
+ lf.makeLockCount >= 1);
for(Enumeration e = lf.locksCreated.keys(); e.hasMoreElements();) {
String lockName = (String) e.nextElement();
@@ -89,6 +89,7 @@
try {
writer2 = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
} catch (Exception e) {
+ e.printStackTrace(System.out);
fail("Should not have hit an IOException with no locking");
}
@@ -233,6 +234,7 @@
try {
writer2 = new IndexWriter(indexDirName, new WhitespaceAnalyzer(), false);
} catch (IOException e) {
+ e.printStackTrace(System.out);
fail("Should not have hit an IOException with locking disabled");
}
@@ -265,6 +267,7 @@
try {
fs2 = FSDirectory.getDirectory(indexDirName, true, lf);
} catch (IOException e) {
+ e.printStackTrace(System.out);
fail("Should not have hit an IOException because LockFactory instances are the same");
}
@@ -293,7 +296,6 @@
public void _testStressLocks(LockFactory lockFactory, String indexDirName) throws IOException {
FSDirectory fs1 = FSDirectory.getDirectory(indexDirName, true, lockFactory);
- // fs1.setLockFactory(NoLockFactory.getNoLockFactory());
// First create a 1 doc index:
IndexWriter w = new IndexWriter(fs1, new WhitespaceAnalyzer(), true);
@@ -404,6 +406,7 @@
hitException = true;
System.out.println("Stress Test Index Writer: creation hit unexpected exception: " + e.toString());
e.printStackTrace(System.out);
+ break;
}
if (writer != null) {
try {
@@ -412,6 +415,7 @@
hitException = true;
System.out.println("Stress Test Index Writer: addDoc hit unexpected exception: " + e.toString());
e.printStackTrace(System.out);
+ break;
}
try {
writer.close();
@@ -419,6 +423,7 @@
hitException = true;
System.out.println("Stress Test Index Writer: close hit unexpected exception: " + e.toString());
e.printStackTrace(System.out);
+ break;
}
writer = null;
}
@@ -445,6 +450,7 @@
hitException = true;
System.out.println("Stress Test Index Searcher: create hit unexpected exception: " + e.toString());
e.printStackTrace(System.out);
+ break;
}
if (searcher != null) {
Hits hits = null;
@@ -454,6 +460,7 @@
hitException = true;
System.out.println("Stress Test Index Searcher: search hit unexpected exception: " + e.toString());
e.printStackTrace(System.out);
+ break;
}
// System.out.println(hits.length() + " total results");
try {
@@ -462,6 +469,7 @@
hitException = true;
System.out.println("Stress Test Index Searcher: close hit unexpected exception: " + e.toString());
e.printStackTrace(System.out);
+ break;
}
searcher = null;
}
Index: src/test/org/apache/lucene/index/TestIndexWriter.java
===================================================================
--- src/test/org/apache/lucene/index/TestIndexWriter.java (revision 468583)
+++ src/test/org/apache/lucene/index/TestIndexWriter.java (working copy)
@@ -1,6 +1,7 @@
package org.apache.lucene.index;
import java.io.IOException;
+import java.io.File;
import junit.framework.TestCase;
@@ -10,7 +11,10 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
/**
@@ -28,14 +32,11 @@
int i;
IndexWriter.setDefaultWriteLockTimeout(2000);
- IndexWriter.setDefaultCommitLockTimeout(2000);
assertEquals(2000, IndexWriter.getDefaultWriteLockTimeout());
- assertEquals(2000, IndexWriter.getDefaultCommitLockTimeout());
writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
IndexWriter.setDefaultWriteLockTimeout(1000);
- IndexWriter.setDefaultCommitLockTimeout(1000);
// add 100 documents
for (i = 0; i < 100; i++) {
@@ -72,6 +73,12 @@
assertEquals(60, reader.maxDoc());
assertEquals(60, reader.numDocs());
reader.close();
+
+ // make sure opening a new index for create over
+ // this existing one works correctly:
+ writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
+ assertEquals(0, writer.docCount());
+ writer.close();
}
private void addDoc(IndexWriter writer) throws IOException
@@ -80,4 +87,192 @@
doc.add(new Field("content", "aaa", Field.Store.NO, Field.Index.TOKENIZED));
writer.addDocument(doc);
}
+
+ // Make sure we can open an index for create even when a
+ // reader holds it open (this fails pre lock-less
+ // commits on windows):
+ public void testCreateWithReader() throws IOException {
+ String tempDir = System.getProperty("java.io.tmpdir");
+ if (tempDir == null)
+ throw new IOException("java.io.tmpdir undefined, cannot run test");
+ File indexDir = new File(tempDir, "lucenetestindexwriter");
+ Directory dir = FSDirectory.getDirectory(indexDir, true);
+
+ // add one document & close writer
+ IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
+ addDoc(writer);
+ writer.close();
+
+ // now open reader:
+ IndexReader reader = IndexReader.open(dir);
+ assertEquals("should be one document", reader.numDocs(), 1);
+
+ // now open index for create:
+ writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
+ assertEquals("should be zero documents", writer.docCount(), 0);
+ addDoc(writer);
+ writer.close();
+
+ assertEquals("should be one document", reader.numDocs(), 1);
+ IndexReader reader2 = IndexReader.open(dir);
+ assertEquals("should be one document", reader2.numDocs(), 1);
+ reader.close();
+ reader2.close();
+ rmDir(indexDir);
+ }
+
+ // Simulate a writer that crashed while writing segments
+ // file: make sure we can still open the index (ie,
+ // gracefully fallback to the previous segments file),
+ // and that we can add to the index:
+ public void testSimulatedCrashedWriter() throws IOException {
+ Directory dir = new RAMDirectory();
+
+ IndexWriter writer = null;
+
+ writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
+
+ // add 100 documents
+ for (int i = 0; i < 100; i++) {
+ addDoc(writer);
+ }
+
+ // close
+ writer.close();
+
+ long gen = SegmentInfos.getCurrentSegmentGeneration(dir);
+ assertTrue("segment generation should be > 1 but got " + gen, gen > 1);
+
+ // Make the next segments file, with last byte
+ // missing, to simulate a writer that crashed while
+ // writing segments file:
+ String fileNameIn = SegmentInfos.getCurrentSegmentFileName(dir);
+ String fileNameOut = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS,
+ "",
+ 1+gen);
+ IndexInput in = dir.openInput(fileNameIn);
+ IndexOutput out = dir.createOutput(fileNameOut);
+ long length = in.length();
+ for(int i=0;i 1 but got " + gen, gen > 1);
+
+ String fileNameIn = SegmentInfos.getCurrentSegmentFileName(dir);
+ String fileNameOut = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS,
+ "",
+ 1+gen);
+ IndexInput in = dir.openInput(fileNameIn);
+ IndexOutput out = dir.createOutput(fileNameOut);
+ long length = in.length();
+ for(int i=0;i 1 but got " + gen, gen > 1);
+
+ String[] files = dir.list();
+ for(int i=0;i dirName, removing dirName
+ first */
+ public void unzip(String dirName) throws IOException {
+ rmDir(dirName);
+
+ Enumeration entries;
+ ZipFile zipFile;
+ zipFile = new ZipFile(dirName + ".zip");
+
+ entries = zipFile.entries();
+ File fileDir = new File(dirName);
+ fileDir.mkdir();
+
+ while (entries.hasMoreElements()) {
+ ZipEntry entry = (ZipEntry) entries.nextElement();
+
+ InputStream in = zipFile.getInputStream(entry);
+ OutputStream out = new BufferedOutputStream(new FileOutputStream(new File(fileDir, entry.getName())));
+
+ byte[] buffer = new byte[8192];
+ int len;
+ while((len = in.read(buffer)) >= 0) {
+ out.write(buffer, 0, len);
+ }
+
+ in.close();
+ out.close();
+ }
+
+ zipFile.close();
+ }
+
+ public void testCreateCFS() throws IOException {
+ createIndex("testindex.cfs", true);
+ }
+
+ public void testCreateNoCFS() throws IOException {
+ createIndex("testindex.nocfs", false);
+ }
+
+ public void testSearchOldIndexCFS() throws IOException {
+ String dirName = "src/test/org/apache/lucene/index/index.prelockless.cfs";
+ unzip(dirName);
+ searchIndex(dirName);
+ rmDir(dirName);
+ }
+
+ public void testIndexOldIndexCFSNoAdds() throws IOException {
+ String dirName = "src/test/org/apache/lucene/index/index.prelockless.cfs";
+ unzip(dirName);
+ changeIndexNoAdds(dirName);
+ rmDir(dirName);
+ }
+
+ public void testIndexOldIndexCFS() throws IOException {
+ String dirName = "src/test/org/apache/lucene/index/index.prelockless.cfs";
+ unzip(dirName);
+ changeIndexWithAdds(dirName);
+ rmDir(dirName);
+ }
+
+ public void testSearchOldIndexNoCFS() throws IOException {
+ String dirName = "src/test/org/apache/lucene/index/index.prelockless.nocfs";
+ unzip(dirName);
+ searchIndex(dirName);
+ rmDir(dirName);
+ }
+
+ public void testIndexOldIndexNoCFS() throws IOException {
+ String dirName = "src/test/org/apache/lucene/index/index.prelockless.nocfs";
+ unzip(dirName);
+ changeIndexWithAdds(dirName);
+ rmDir(dirName);
+ }
+
+ public void testIndexOldIndexNoCFSNoAdds() throws IOException {
+ String dirName = "src/test/org/apache/lucene/index/index.prelockless.nocfs";
+ unzip(dirName);
+ changeIndexNoAdds(dirName);
+ rmDir(dirName);
+ }
+
+ public void searchIndex(String dirName) throws IOException {
+ //QueryParser parser = new QueryParser("contents", new WhitespaceAnalyzer());
+ //Query query = parser.parse("handle:1");
+
+ Directory dir = FSDirectory.getDirectory(dirName, false);
+ IndexSearcher searcher = new IndexSearcher(dir);
+
+ Hits hits = searcher.search(new TermQuery(new Term("content", "aaa")));
+ assertEquals(34, hits.length());
+ Document d = hits.doc(0);
+
+ // First document should be #21 since it's norm was increased:
+ assertEquals("didn't get the right document first", "21", d.get("id"));
+
+ searcher.close();
+ dir.close();
+ }
+
+ /* Open pre-lockless index, add docs, do a delete &
+ * setNorm, and search */
+ public void changeIndexWithAdds(String dirName) throws IOException {
+
+ Directory dir = FSDirectory.getDirectory(dirName, false);
+ // open writer
+ IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
+
+ // add 10 docs
+ for(int i=0;i<10;i++) {
+ addDoc(writer, 35+i);
+ }
+
+ // make sure writer sees right total -- writer seems not to know about deletes in .del?
+ assertEquals("wrong doc count", 45, writer.docCount());
+ writer.close();
+
+ // make sure searching sees right # hits
+ IndexSearcher searcher = new IndexSearcher(dir);
+ Hits hits = searcher.search(new TermQuery(new Term("content", "aaa")));
+ assertEquals("wrong number of hits", 44, hits.length());
+ Document d = hits.doc(0);
+ assertEquals("wrong first document", "21", d.get("id"));
+ searcher.close();
+
+ // make sure we can do another delete & another setNorm against this
+ // pre-lockless segment:
+ IndexReader reader = IndexReader.open(dir);
+ Term searchTerm = new Term("id", "6");
+ int delCount = reader.deleteDocuments(searchTerm);
+ assertEquals("wrong delete count", 1, delCount);
+ reader.setNorm(22, "content", (float) 2.0);
+ reader.close();
+
+ // make sure 2nd delete & 2nd norm "took":
+ searcher = new IndexSearcher(dir);
+ hits = searcher.search(new TermQuery(new Term("content", "aaa")));
+ assertEquals("wrong number of hits", 43, hits.length());
+ d = hits.doc(0);
+ assertEquals("wrong first document", "22", d.get("id"));
+ searcher.close();
+
+ // optimize
+ writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
+ writer.optimize();
+ writer.close();
+
+ searcher = new IndexSearcher(dir);
+ hits = searcher.search(new TermQuery(new Term("content", "aaa")));
+ assertEquals("wrong number of hits", 43, hits.length());
+ d = hits.doc(0);
+ assertEquals("wrong first document", "22", d.get("id"));
+ searcher.close();
+
+ dir.close();
+ }
+
+ /* Open pre-lockless index, add docs, do a delete &
+ * setNorm, and search */
+ public void changeIndexNoAdds(String dirName) throws IOException {
+
+ Directory dir = FSDirectory.getDirectory(dirName, false);
+
+ // make sure searching sees right # hits
+ IndexSearcher searcher = new IndexSearcher(dir);
+ Hits hits = searcher.search(new TermQuery(new Term("content", "aaa")));
+ assertEquals("wrong number of hits", 34, hits.length());
+ Document d = hits.doc(0);
+ assertEquals("wrong first document", "21", d.get("id"));
+ searcher.close();
+
+ // make sure we can do another delete & another setNorm against this
+ // pre-lockless segment:
+ IndexReader reader = IndexReader.open(dir);
+ Term searchTerm = new Term("id", "6");
+ int delCount = reader.deleteDocuments(searchTerm);
+ assertEquals("wrong delete count", 1, delCount);
+ reader.setNorm(22, "content", (float) 2.0);
+ reader.close();
+
+ // make sure 2nd delete & 2nd norm "took":
+ searcher = new IndexSearcher(dir);
+ hits = searcher.search(new TermQuery(new Term("content", "aaa")));
+ assertEquals("wrong number of hits", 33, hits.length());
+ d = hits.doc(0);
+ assertEquals("wrong first document", "22", d.get("id"));
+ searcher.close();
+
+ // optimize
+ IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
+ writer.optimize();
+ writer.close();
+
+ searcher = new IndexSearcher(dir);
+ hits = searcher.search(new TermQuery(new Term("content", "aaa")));
+ assertEquals("wrong number of hits", 33, hits.length());
+ d = hits.doc(0);
+ assertEquals("wrong first document", "22", d.get("id"));
+ searcher.close();
+
+ dir.close();
+ }
+
+ public void createIndex(String dirName, boolean doCFS) throws IOException {
+
+ Directory dir = FSDirectory.getDirectory(dirName, true);
+ IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
+ writer.setUseCompoundFile(doCFS);
+
+ for(int i=0;i<35;i++) {
+ addDoc(writer, i);
+ }
+ assertEquals("wrong doc count", 35, writer.docCount());
+ writer.close();
+
+ // Delete one doc so we get a .del file:
+ IndexReader reader = IndexReader.open(dir);
+ Term searchTerm = new Term("id", "7");
+ int delCount = reader.deleteDocuments(searchTerm);
+ assertEquals("didn't delete the right number of documents", 1, delCount);
+
+ // Set one norm so we get a .s0 file:
+ reader.setNorm(21, "content", (float) 1.5);
+ reader.close();
+
+ rmDir(dirName);
+ }
+
+ /* Verifies that the expected file names were produced */
+
+ public void testExactFileNames() throws IOException {
+
+ String outputDir = "lucene.backwardscompat0.index";
+ Directory dir = FSDirectory.getDirectory(outputDir, true);
+ IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
+ for(int i=0;i<35;i++) {
+ addDoc(writer, i);
+ }
+ assertEquals("wrong doc count", 35, writer.docCount());
+ writer.close();
+
+ // Delete one doc so we get a .del file:
+ IndexReader reader = IndexReader.open(dir);
+ Term searchTerm = new Term("id", "7");
+ int delCount = reader.deleteDocuments(searchTerm);
+ assertEquals("didn't delete the right number of documents", 1, delCount);
+
+ // Set one norm so we get a .s0 file:
+ reader.setNorm(21, "content", (float) 1.5);
+ reader.close();
+
+ // Now verify file names:
+ String[] expected = {"_0.cfs",
+ "_0_1.del",
+ "_1.cfs",
+ "_2.cfs",
+ "_2_1.s0",
+ "_3.cfs",
+ "segments_a",
+ "segments.gen"};
+
+ String[] actual = dir.list();
+ Arrays.sort(expected);
+ Arrays.sort(actual);
+ if (!Arrays.equals(expected, actual)) {
+ fail("incorrect filenames in index: expected:\n " + asString(expected) + "\n actual:\n " + asString(actual));
+ }
+
+ rmDir(outputDir);
+ }
+
+ private String asString(String[] l) {
+ String s = "";
+ for(int i=0;i 0) {
+ s += "\n ";
+ }
+ s += l[i];
+ }
+ return s;
+ }
+
+ private void addDoc(IndexWriter writer, int id) throws IOException
+ {
+ Document doc = new Document();
+ doc.add(new Field("content", "aaa", Field.Store.NO, Field.Index.TOKENIZED));
+ doc.add(new Field("id", Integer.toString(id), Field.Store.YES, Field.Index.UN_TOKENIZED));
+ writer.addDocument(doc);
+ }
+
+ private void rmDir(String dir) {
+ File fileDir = new File(dir);
+ if (fileDir.exists()) {
+ File[] files = fileDir.listFiles();
+ if (files != null) {
+ for (int i = 0; i < files.length; i++) {
+ files[i].delete();
+ }
+ }
+ fileDir.delete();
+ }
+ }
+}
\ No newline at end of file
Index: src/java/org/apache/lucene/index/MultiReader.java
===================================================================
--- src/java/org/apache/lucene/index/MultiReader.java (revision 468583)
+++ src/java/org/apache/lucene/index/MultiReader.java (working copy)
@@ -217,6 +217,13 @@
return new MultiTermPositions(subReaders, starts);
}
+ protected void setDeleter(IndexFileDeleter deleter) {
+ // Share deleter to our SegmentReaders:
+ this.deleter = deleter;
+ for (int i = 0; i < subReaders.length; i++)
+ subReaders[i].setDeleter(deleter);
+ }
+
protected void doCommit() throws IOException {
for (int i = 0; i < subReaders.length; i++)
subReaders[i].commit();
Index: src/java/org/apache/lucene/index/IndexReader.java
===================================================================
--- src/java/org/apache/lucene/index/IndexReader.java (revision 468583)
+++ src/java/org/apache/lucene/index/IndexReader.java (working copy)
@@ -112,6 +112,7 @@
private Directory directory;
private boolean directoryOwner;
private boolean closeDirectory;
+ protected IndexFileDeleter deleter;
private SegmentInfos segmentInfos;
private Lock writeLock;
@@ -137,24 +138,40 @@
}
private static IndexReader open(final Directory directory, final boolean closeDirectory) throws IOException {
- synchronized (directory) { // in- & inter-process sync
- return (IndexReader)new Lock.With(
- directory.makeLock(IndexWriter.COMMIT_LOCK_NAME),
- IndexWriter.COMMIT_LOCK_TIMEOUT) {
- public Object doBody() throws IOException {
- SegmentInfos infos = new SegmentInfos();
- infos.read(directory);
- if (infos.size() == 1) { // index is optimized
- return SegmentReader.get(infos, infos.info(0), closeDirectory);
+
+ return (IndexReader) new SegmentInfos.FindSegmentsFile(directory) {
+
+ public Object doBody(String segmentFileName) throws IOException {
+
+ SegmentInfos infos = new SegmentInfos();
+ infos.read(directory, segmentFileName);
+
+ if (infos.size() == 1) { // index is optimized
+ return SegmentReader.get(infos, infos.info(0), closeDirectory);
+ } else {
+
+ // To reduce the chance of hitting FileNotFound
+ // (and having to retry), we open segments in
+ // reverse because IndexWriter merges & deletes
+ // the newest segments first.
+
+ IndexReader[] readers = new IndexReader[infos.size()];
+ for (int i = infos.size()-1; i >= 0; i--) {
+ try {
+ readers[i] = SegmentReader.get(infos.info(i));
+ } catch (IOException e) {
+ // Close all readers we had opened:
+ for(i++;itrue if an index exists; false otherwise
*/
public static boolean indexExists(String directory) {
- return (new File(directory, IndexFileNames.SEGMENTS)).exists();
+ return indexExists(new File(directory));
}
/**
@@ -327,8 +326,9 @@
* @param directory the directory to check for an index
* @return true if an index exists; false otherwise
*/
+
public static boolean indexExists(File directory) {
- return (new File(directory, IndexFileNames.SEGMENTS)).exists();
+ return SegmentInfos.getCurrentSegmentGeneration(directory.list()) != -1;
}
/**
@@ -339,7 +339,7 @@
* @throws IOException if there is a problem with accessing the index
*/
public static boolean indexExists(Directory directory) throws IOException {
- return directory.fileExists(IndexFileNames.SEGMENTS);
+ return SegmentInfos.getCurrentSegmentGeneration(directory) != -1;
}
/** Returns the number of documents in this index. */
@@ -591,17 +591,22 @@
*/
protected final synchronized void commit() throws IOException{
if(hasChanges){
+ if (deleter == null) {
+ // In the MultiReader case, we share this deleter
+ // across all SegmentReaders:
+ setDeleter(new IndexFileDeleter(segmentInfos, directory));
+ deleter.deleteFiles();
+ }
if(directoryOwner){
- synchronized (directory) { // in- & inter-process sync
- new Lock.With(directory.makeLock(IndexWriter.COMMIT_LOCK_NAME),
- IndexWriter.COMMIT_LOCK_TIMEOUT) {
- public Object doBody() throws IOException {
- doCommit();
- segmentInfos.write(directory);
- return null;
- }
- }.run();
- }
+ deleter.clearPendingFiles();
+ doCommit();
+ String oldInfoFileName = segmentInfos.getCurrentSegmentFileName();
+ segmentInfos.write(directory);
+ // Attempt to delete all files we just obsoleted:
+
+ deleter.deleteFile(oldInfoFileName);
+ deleter.commitPendingFiles();
+ deleter.deleteFiles();
if (writeLock != null) {
writeLock.release(); // release write lock
writeLock = null;
@@ -613,6 +618,13 @@
hasChanges = false;
}
+ protected void setDeleter(IndexFileDeleter deleter) {
+ this.deleter = deleter;
+ }
+ protected IndexFileDeleter getDeleter() {
+ return deleter;
+ }
+
/** Implements commit. */
protected abstract void doCommit() throws IOException;
@@ -657,8 +669,7 @@
*/
public static boolean isLocked(Directory directory) throws IOException {
return
- directory.makeLock(IndexWriter.WRITE_LOCK_NAME).isLocked() ||
- directory.makeLock(IndexWriter.COMMIT_LOCK_NAME).isLocked();
+ directory.makeLock(IndexWriter.WRITE_LOCK_NAME).isLocked();
}
/**
@@ -683,7 +694,6 @@
*/
public static void unlock(Directory directory) throws IOException {
directory.makeLock(IndexWriter.WRITE_LOCK_NAME).release();
- directory.makeLock(IndexWriter.COMMIT_LOCK_NAME).release();
}
/**
Index: src/java/org/apache/lucene/index/IndexFileNames.java
===================================================================
--- src/java/org/apache/lucene/index/IndexFileNames.java (revision 468583)
+++ src/java/org/apache/lucene/index/IndexFileNames.java (working copy)
@@ -26,19 +26,25 @@
/** Name of the index segment file */
static final String SEGMENTS = "segments";
+
+ /** Name of the generation reference file name */
+ static final String SEGMENTS_GEN = "segments.gen";
- /** Name of the index deletable file */
+ /** Name of the index deletable file (only used in
+ * pre-lockless indices) */
static final String DELETABLE = "deletable";
-
+
/**
- * This array contains all filename extensions used by Lucene's index files, with
- * one exception, namely the extension made up from .f + a number.
- * Also note that two of Lucene's files (deletable and
- * segments) don't have any filename extension.
+ * This array contains all filename extensions used by
+ * Lucene's index files, with two exceptions, namely the
+ * extension made up from .f + a number and
+ * from .s + a number. Also note that
+ * Lucene's segments_N files do not have any
+ * filename extension.
*/
static final String INDEX_EXTENSIONS[] = new String[] {
"cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del",
- "tvx", "tvd", "tvf", "tvp" };
+ "tvx", "tvd", "tvf", "tvp", "gen"};
/** File extensions of old-style index files */
static final String COMPOUND_EXTENSIONS[] = new String[] {
@@ -49,5 +55,24 @@
static final String VECTOR_EXTENSIONS[] = new String[] {
"tvx", "tvd", "tvf"
};
-
+
+ /**
+ * Computes the full file name from base, extension and
+ * generation. If the generation is -1, the file name is
+ * null. If it's 0, the file name is .
+ * If it's > 0, the file name is _.
+ *
+ * @param base -- main part of the file name
+ * @param extension -- extension of the filename (including .)
+ * @param gen -- generation
+ */
+ public static final String fileNameFromGeneration(String base, String extension, long gen) {
+ if (gen == -1) {
+ return null;
+ } else if (gen == 0) {
+ return base + extension;
+ } else {
+ return base + "_" + Long.toString(gen, Character.MAX_RADIX) + extension;
+ }
+ }
}
Index: src/java/org/apache/lucene/index/SegmentInfos.java
===================================================================
--- src/java/org/apache/lucene/index/SegmentInfos.java (revision 468583)
+++ src/java/org/apache/lucene/index/SegmentInfos.java (working copy)
@@ -18,6 +18,9 @@
import java.util.Vector;
import java.io.IOException;
+import java.io.PrintStream;
+import java.io.File;
+import java.io.FileNotFoundException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
@@ -28,26 +31,138 @@
/** The file format version, a negative number. */
/* Works since counter, the old 1st entry, is always >= 0 */
public static final int FORMAT = -1;
-
+
+ /** This is the current file format written. It differs
+ * slightly from the previous format in that file names
+ * are never re-used (write once). Instead, each file is
+ * written to the next generation. For example,
+ * segments_1, segments_2, etc. This allows us to not use
+ * a commit lock. See file
+ * formats for details.
+ */
+ public static final int FORMAT_LOCKLESS = -2;
+
public int counter = 0; // used to name new segments
/**
* counts how often the index has been changed by adding or deleting docs.
* starting with the current time in milliseconds forces to create unique version numbers.
*/
private long version = System.currentTimeMillis();
+ private long generation = 0; // generation of the "segments_N" file we read
+ /**
+ * If non-null, information about loading segments_N files
+ * will be printed here. @see #setInfoStream.
+ */
+ private static PrintStream infoStream;
+
public final SegmentInfo info(int i) {
return (SegmentInfo) elementAt(i);
}
- public final void read(Directory directory) throws IOException {
-
- IndexInput input = directory.openInput(IndexFileNames.SEGMENTS);
+ /**
+ * Get the generation (N) of the current segments_N file
+ * from a list of files.
+ *
+ * @param files -- array of file names to check
+ */
+ public static long getCurrentSegmentGeneration(String[] files) {
+ if (files == null) {
+ return -1;
+ }
+ long max = -1;
+ int prefixLen = IndexFileNames.SEGMENTS.length()+1;
+ for (int i = 0; i < files.length; i++) {
+ String file = files[i];
+ if (file.startsWith(IndexFileNames.SEGMENTS) && !file.equals(IndexFileNames.SEGMENTS_GEN)) {
+ if (file.equals(IndexFileNames.SEGMENTS)) {
+ // Pre lock-less commits:
+ if (max == -1) {
+ max = 0;
+ }
+ } else {
+ long v = Long.parseLong(file.substring(prefixLen), Character.MAX_RADIX);
+ if (v > max) {
+ max = v;
+ }
+ }
+ }
+ }
+ return max;
+ }
+
+ /**
+ * Get the generation (N) of the current segments_N file
+ * in the directory.
+ *
+ * @param directory -- directory to search for the latest segments_N file
+ */
+ public static long getCurrentSegmentGeneration(Directory directory) throws IOException {
+ String[] files = directory.list();
+ if (files == null)
+ throw new IOException("Cannot read directory " + directory);
+ return getCurrentSegmentGeneration(files);
+ }
+
+ /**
+ * Get the filename of the current segments_N file
+ * from a list of files.
+ *
+ * @param files -- array of file names to check
+ */
+
+ public static String getCurrentSegmentFileName(String[] files) throws IOException {
+ return IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS,
+ "",
+ getCurrentSegmentGeneration(files));
+ }
+
+ /**
+ * Get the filename of the current segments_N file
+ * in the directory.
+ *
+ * @param directory -- directory to search for the latest segments_N file
+ */
+ public static String getCurrentSegmentFileName(Directory directory) throws IOException {
+ return IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS,
+ "",
+ getCurrentSegmentGeneration(directory));
+ }
+
+ /**
+ * Get the segment_N filename in use by this segment infos.
+ */
+ public String getCurrentSegmentFileName() {
+ return IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS,
+ "",
+ generation);
+ }
+
+ /**
+ * Read a particular segmentFileName. Note that this may
+ * throw an IOException if a commit is in process.
+ *
+ * @param directory -- directory containing the segments file
+ * @param segmentFileName -- segment file to load
+ */
+ public final void read(Directory directory, String segmentFileName) throws IOException {
+ boolean success = false;
+
+ IndexInput input = directory.openInput(segmentFileName);
+
+ if (segmentFileName.equals(IndexFileNames.SEGMENTS)) {
+ generation = 0;
+ } else {
+ generation = Long.parseLong(segmentFileName.substring(1+IndexFileNames.SEGMENTS.length()),
+ Character.MAX_RADIX);
+ }
+
try {
int format = input.readInt();
if(format < 0){ // file contains explicit format info
// check that it is a format we can understand
- if (format < FORMAT)
+ if (format < FORMAT_LOCKLESS)
throw new IOException("Unknown format version: " + format);
version = input.readLong(); // read version
counter = input.readInt(); // read counter
@@ -57,9 +172,7 @@
}
for (int i = input.readInt(); i > 0; i--) { // read segmentInfos
- SegmentInfo si =
- new SegmentInfo(input.readString(), input.readInt(), directory);
- addElement(si);
+ addElement(new SegmentInfo(directory, format, input));
}
if(format >= 0){ // in old format the version number may be at the end of the file
@@ -68,31 +181,70 @@
else
version = input.readLong(); // read version
}
+ success = true;
}
finally {
input.close();
+ if (!success) {
+ // Clear any segment infos we had loaded so we
+ // have a clean slate on retry:
+ clear();
+ }
}
}
+ /**
+ * This version of read uses the retry logic (for lock-less
+ * commits) to find the right segments file to load.
+ */
+ public final void read(Directory directory) throws IOException {
+ generation = -1;
+
+ new FindSegmentsFile(directory) {
+
+ public Object doBody(String segmentFileName) throws IOException {
+ read(directory, segmentFileName);
+ return null;
+ }
+ }.run();
+ }
+
public final void write(Directory directory) throws IOException {
- IndexOutput output = directory.createOutput("segments.new");
+
+ // Always advance the generation on write:
+ if (generation == -1) {
+ generation = 1;
+ } else {
+ generation++;
+ }
+
+ String segmentFileName = getCurrentSegmentFileName();
+ IndexOutput output = directory.createOutput(segmentFileName);
+
try {
- output.writeInt(FORMAT); // write FORMAT
- output.writeLong(++version); // every write changes the index
+ output.writeInt(FORMAT_LOCKLESS); // write FORMAT
+ output.writeLong(++version); // every write changes
+ // the index
output.writeInt(counter); // write counter
output.writeInt(size()); // write infos
for (int i = 0; i < size(); i++) {
SegmentInfo si = info(i);
- output.writeString(si.name);
- output.writeInt(si.docCount);
+ si.write(output);
}
}
finally {
output.close();
}
- // install new segment info
- directory.renameFile("segments.new", IndexFileNames.SEGMENTS);
+ try {
+ output = directory.createOutput(IndexFileNames.SEGMENTS_GEN);
+ output.writeLong(generation);
+ output.writeLong(generation);
+ output.close();
+ } catch (IOException e) {
+ // It's OK if we fail to write this file since it's
+ // used only as one of the retry fallbacks.
+ }
}
/**
@@ -107,30 +259,266 @@
*/
public static long readCurrentVersion(Directory directory)
throws IOException {
+
+ return ((Long) new FindSegmentsFile(directory) {
+ public Object doBody(String segmentFileName) throws IOException {
+
+ IndexInput input = directory.openInput(segmentFileName);
+
+ int format = 0;
+ long version = 0;
+ try {
+ format = input.readInt();
+ if(format < 0){
+ if (format < FORMAT_LOCKLESS)
+ throw new IOException("Unknown format version: " + format);
+ version = input.readLong(); // read version
+ }
+ }
+ finally {
+ input.close();
+ }
+
+ if(format < 0)
+ return new Long(version);
+
+ // We cannot be sure about the format of the file.
+ // Therefore we have to read the whole file and cannot simply seek to the version entry.
+ SegmentInfos sis = new SegmentInfos();
+ sis.read(directory, segmentFileName);
+ return new Long(sis.getVersion());
+ }
+ }.run()).longValue();
+ }
+
+ /** If non-null, information about retries when loading
+ * the segments file will be printed to this:
+ */
+ public static void setInfoStream(PrintStream infoStream) {
+ SegmentInfos.infoStream = infoStream;
+ }
+
+ /**
+ * @see #setInfoStream
+ */
+ public static PrintStream getInfoStream() {
+ return infoStream;
+ }
+
+ private static void message(String message) {
+ if (infoStream != null) {
+ infoStream.println(Thread.currentThread().getName() + ": " + message);
+ }
+ }
+
+ /**
+ * Utility class for executing code that needs to do
+ * something with the current segments file. This is
+ * necessary with lock-less commits because from the time
+ * you locate the current segments file name, until you
+ * actually open it, read its contents, or check modified
+ * time, etc., it could have been deleted due to a writer
+ * commit finishing.
+ */
+ public abstract static class FindSegmentsFile {
+
+ File fileDirectory;
+ Directory directory;
+
+ public FindSegmentsFile(File directory) {
+ this.fileDirectory = directory;
+ }
+
+ public FindSegmentsFile(Directory directory) {
+ this.directory = directory;
+ }
+
+ public Object run() throws IOException {
+ String segmentFileName = null;
+ long lastGen = -1;
+ long gen = 0;
+ int genLookaheadCount = 0;
+ IOException exc = null;
+ boolean retry = false;
+
+ int method = 0;
+
+ // Loop until we succeed in calling doBody() without
+ // hitting an IOException. An IOException most likely
+ // means a commit was in process and has finished, in
+ // the time it took us to load the now-old infos files
+ // (and segments files). It's also possible it's a
+ // true error (corrupt index). To distinguish these,
+ // on each retry we must see "forward progress" on
+ // which generation we are trying to load. If we
+ // don't, then the original error is real and we throw
+ // it.
- IndexInput input = directory.openInput(IndexFileNames.SEGMENTS);
- int format = 0;
- long version = 0;
- try {
- format = input.readInt();
- if(format < 0){
- if (format < FORMAT)
- throw new IOException("Unknown format version: " + format);
- version = input.readLong(); // read version
+ // We have three methods for determining the current
+ // generation. We try each in sequence.
+
+ while(true) {
+
+ // Method 1: list the directory and use the highest
+ // segments_N file. This method works well as long
+ // as there is no stale caching on the directory
+ // contents:
+ String[] files = null;
+
+ if (0 == method) {
+ if (directory != null) {
+ files = directory.list();
+ } else {
+ files = fileDirectory.list();
+ }
+
+ gen = getCurrentSegmentGeneration(files);
+
+ if (gen == -1) {
+ String s = "";
+ for(int i=0;i gen) {
+ message("fallback to '" + IndexFileNames.SEGMENTS_GEN + "' check: now try generation " + gen0 + " > " + gen);
+ gen = gen0;
+ }
+ break;
+ }
+ } catch (IOException err2) {
+ // will retry
+ } finally {
+ genInput.close();
+ }
+ }
+ try {
+ // LOCKLESS TODO: make this 50 msec configurable/settable
+ Thread.sleep(50);
+ } catch (InterruptedException e) {
+ // will retry
+ }
+ }
+ }
+
+ // Method 3 (fallback if Methods 2 & 3 are not
+ // reliabel): since both directory cache and file
+ // contents cache seem to be stale, just advance the
+ // generation.
+ if (2 == method || (1 == method && lastGen == gen && retry)) {
+
+ method = 2;
+
+ // LOCKLESS TODO: make this 10 max lookahead configurable/settable
+ if (genLookaheadCount < 10) {
+ gen++;
+ genLookaheadCount++;
+ message("look ahead incremenent gen to " + gen);
+ }
+ }
+
+ if (lastGen == gen) {
+
+ // This means we're about to try the same
+ // segments_N last tried. This is allowed,
+ // exactly once, because writer could have been in
+ // the process of writing segments_N last time.
+
+ if (retry) {
+ // OK, we've tried the same segments_N file
+ // twice in a row, so this must be a real
+ // error. We throw the original exception we
+ // got.
+ throw exc;
+ } else {
+ retry = true;
+ }
+
+ } else {
+ // Segment file has advanced since our last loop, so
+ // reset retry:
+ retry = false;
+ }
+
+ lastGen = gen;
+
+ segmentFileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS,
+ "",
+ gen);
+
+ try {
+ Object v = doBody(segmentFileName);
+ if (exc != null) {
+ message("success on " + segmentFileName);
+ }
+ return v;
+ } catch (IOException err) {
+
+ // Save the original root cause:
+ if (exc == null) {
+ exc = err;
+ }
+
+ message("primary Exception on '" + segmentFileName + "': " + err + "'; will retry: retry=" + retry + "; gen = " + gen);
+
+ if (!retry && gen > 1) {
+
+ // This is our first time trying this segments
+ // file (because retry is false), and, there is
+ // possibly a segments_(N-1) (because gen > 1).
+ // So, check if the segments_(N-1) exists and
+ // try it if so:
+ String prevSegmentFileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS,
+ "",
+ gen-1);
+
+ if (directory.fileExists(prevSegmentFileName)) {
+ message("fallback to prior segment file '" + prevSegmentFileName + "'");
+ try {
+ Object v = doBody(prevSegmentFileName);
+ if (exc != null) {
+ message("success on fallback " + prevSegmentFileName);
+ }
+ return v;
+ } catch (IOException err2) {
+ message("secondary Exception on '" + prevSegmentFileName + "': " + err2 + "'; will retry");
+ }
+ }
+ }
+ }
}
}
- finally {
- input.close();
- }
-
- if(format < 0)
- return version;
- // We cannot be sure about the format of the file.
- // Therefore we have to read the whole file and cannot simply seek to the version entry.
-
- SegmentInfos sis = new SegmentInfos();
- sis.read(directory);
- return sis.getVersion();
- }
+ /**
+ * Subclass must implement this. The assumption is an
+ * IOException will be thrown if something goes wrong
+ * during the processing that could have been caused by
+ * a writer committing.
+ */
+ protected abstract Object doBody(String segmentFileName) throws IOException;}
}
Index: src/java/org/apache/lucene/index/IndexWriter.java
===================================================================
--- src/java/org/apache/lucene/index/IndexWriter.java (revision 468583)
+++ src/java/org/apache/lucene/index/IndexWriter.java (working copy)
@@ -66,16 +66,7 @@
private long writeLockTimeout = WRITE_LOCK_TIMEOUT;
- /**
- * Default value for the commit lock timeout (10,000).
- * @see #setDefaultCommitLockTimeout
- */
- public static long COMMIT_LOCK_TIMEOUT = 10000;
-
- private long commitLockTimeout = COMMIT_LOCK_TIMEOUT;
-
public static final String WRITE_LOCK_NAME = "write.lock";
- public static final String COMMIT_LOCK_NAME = "commit.lock";
/**
* Default value is 10. Change using {@link #setMergeFactor(int)}.
@@ -110,6 +101,7 @@
private SegmentInfos segmentInfos = new SegmentInfos(); // the segments
private SegmentInfos ramSegmentInfos = new SegmentInfos(); // the segments in ramDirectory
private final Directory ramDirectory = new RAMDirectory(); // for temp segs
+ private IndexFileDeleter deleter;
private Lock writeLock;
@@ -258,17 +250,28 @@
throw new IOException("Index locked for write: " + writeLock);
this.writeLock = writeLock; // save it
- synchronized (directory) { // in- & inter-process sync
- new Lock.With(directory.makeLock(IndexWriter.COMMIT_LOCK_NAME), commitLockTimeout) {
- public Object doBody() throws IOException {
- if (create)
- segmentInfos.write(directory);
- else
- segmentInfos.read(directory);
- return null;
- }
- }.run();
+ if (create) {
+ // Try to read first. This is to allow create
+ // against an index that's currently open for
+ // searching. In this case we write the next
+ // segments_N file with no segments:
+ try {
+ segmentInfos.read(directory);
+ segmentInfos.clear();
+ } catch (IOException e) {
+ // Likely this means it's a fresh directory
+ }
+ segmentInfos.write(directory);
+ } else {
+ segmentInfos.read(directory);
}
+
+ // Create a deleter to keep track of which files can
+ // be deleted:
+ deleter = new IndexFileDeleter(segmentInfos, directory);
+ deleter.setInfoStream(infoStream);
+ deleter.findDeletableFiles();
+ deleter.deleteFiles();
}
/** Determines the largest number of documents ever merged by addDocument().
@@ -373,35 +376,6 @@
}
/**
- * Sets the maximum time to wait for a commit lock (in milliseconds) for this instance of IndexWriter. @see
- * @see #setDefaultCommitLockTimeout to change the default value for all instances of IndexWriter.
- */
- public void setCommitLockTimeout(long commitLockTimeout) {
- this.commitLockTimeout = commitLockTimeout;
- }
-
- /**
- * @see #setCommitLockTimeout
- */
- public long getCommitLockTimeout() {
- return commitLockTimeout;
- }
-
- /**
- * Sets the default (for any instance of IndexWriter) maximum time to wait for a commit lock (in milliseconds)
- */
- public static void setDefaultCommitLockTimeout(long commitLockTimeout) {
- IndexWriter.COMMIT_LOCK_TIMEOUT = commitLockTimeout;
- }
-
- /**
- * @see #setDefaultCommitLockTimeout
- */
- public static long getDefaultCommitLockTimeout() {
- return IndexWriter.COMMIT_LOCK_TIMEOUT;
- }
-
- /**
* Sets the maximum time to wait for a write lock (in milliseconds) for this instance of IndexWriter. @see
* @see #setDefaultWriteLockTimeout to change the default value for all instances of IndexWriter.
*/
@@ -509,7 +483,7 @@
String segmentName = newRAMSegmentName();
dw.addDocument(segmentName, doc);
synchronized (this) {
- ramSegmentInfos.addElement(new SegmentInfo(segmentName, 1, ramDirectory));
+ ramSegmentInfos.addElement(new SegmentInfo(segmentName, 1, ramDirectory, false));
maybeFlushRamSegments();
}
}
@@ -782,36 +756,26 @@
int docCount = merger.merge(); // merge 'em
segmentInfos.setSize(0); // pop old infos & add new
- segmentInfos.addElement(new SegmentInfo(mergedName, docCount, directory));
+ SegmentInfo info = new SegmentInfo(mergedName, docCount, directory, false);
+ segmentInfos.addElement(info);
if(sReader != null)
sReader.close();
- synchronized (directory) { // in- & inter-process sync
- new Lock.With(directory.makeLock(COMMIT_LOCK_NAME), commitLockTimeout) {
- public Object doBody() throws IOException {
- segmentInfos.write(directory); // commit changes
- return null;
- }
- }.run();
- }
+ String segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName();
+ segmentInfos.write(directory); // commit changes
- deleteSegments(segmentsToDelete); // delete now-unused segments
+ deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file
+ deleter.deleteSegments(segmentsToDelete); // delete now-unused segments
if (useCompoundFile) {
- final Vector filesToDelete = merger.createCompoundFile(mergedName + ".tmp");
- synchronized (directory) { // in- & inter-process sync
- new Lock.With(directory.makeLock(COMMIT_LOCK_NAME), commitLockTimeout) {
- public Object doBody() throws IOException {
- // make compound file visible for SegmentReaders
- directory.renameFile(mergedName + ".tmp", mergedName + ".cfs");
- return null;
- }
- }.run();
- }
+ Vector filesToDelete = merger.createCompoundFile(mergedName + ".cfs");
+ segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName();
+ info.setUseCompoundFile(true);
+ segmentInfos.write(directory); // commit again so readers know we've switched this segment to a compound file
- // delete now unused files of segment
- deleteFiles(filesToDelete);
+ deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file
+ deleter.deleteFiles(filesToDelete); // delete now unused files of segment
}
}
@@ -929,10 +893,11 @@
*/
private final int mergeSegments(SegmentInfos sourceSegments, int minSegment, int end)
throws IOException {
+
final String mergedName = newSegmentName();
if (infoStream != null) infoStream.print("merging segments");
SegmentMerger merger = new SegmentMerger(this, mergedName);
-
+
final Vector segmentsToDelete = new Vector();
for (int i = minSegment; i < end; i++) {
SegmentInfo si = sourceSegments.info(i);
@@ -952,7 +917,7 @@
}
SegmentInfo newSegment = new SegmentInfo(mergedName, mergedDocCount,
- directory);
+ directory, false);
if (sourceSegments == ramSegmentInfos) {
sourceSegments.removeAllElements();
segmentInfos.addElement(newSegment);
@@ -965,115 +930,26 @@
// close readers before we attempt to delete now-obsolete segments
merger.closeReaders();
- synchronized (directory) { // in- & inter-process sync
- new Lock.With(directory.makeLock(COMMIT_LOCK_NAME), commitLockTimeout) {
- public Object doBody() throws IOException {
- segmentInfos.write(directory); // commit before deleting
- return null;
- }
- }.run();
- }
-
- deleteSegments(segmentsToDelete); // delete now-unused segments
+ String segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName();
+ segmentInfos.write(directory); // commit before deleting
+ deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file
+ deleter.deleteSegments(segmentsToDelete); // delete now-unused segments
+
if (useCompoundFile) {
- final Vector filesToDelete = merger.createCompoundFile(mergedName + ".tmp");
- synchronized (directory) { // in- & inter-process sync
- new Lock.With(directory.makeLock(COMMIT_LOCK_NAME), commitLockTimeout) {
- public Object doBody() throws IOException {
- // make compound file visible for SegmentReaders
- directory.renameFile(mergedName + ".tmp", mergedName + ".cfs");
- return null;
- }
- }.run();
- }
+ Vector filesToDelete = merger.createCompoundFile(mergedName + ".cfs");
- // delete now unused files of segment
- deleteFiles(filesToDelete);
+ segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName();
+ newSegment.setUseCompoundFile(true);
+ segmentInfos.write(directory); // commit again so readers know we've switched this segment to a compound file
+
+ deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file
+ deleter.deleteFiles(filesToDelete); // delete now-unused segments
}
return mergedDocCount;
}
- /*
- * Some operating systems (e.g. Windows) don't permit a file to be deleted
- * while it is opened for read (e.g. by another process or thread). So we
- * assume that when a delete fails it is because the file is open in another
- * process, and queue the file for subsequent deletion.
- */
-
- private final void deleteSegments(Vector segments) throws IOException {
- Vector deletable = new Vector();
-
- deleteFiles(readDeleteableFiles(), deletable); // try to delete deleteable
-
- for (int i = 0; i < segments.size(); i++) {
- SegmentReader reader = (SegmentReader)segments.elementAt(i);
- if (reader.directory() == this.directory)
- deleteFiles(reader.files(), deletable); // try to delete our files
- else
- deleteFiles(reader.files(), reader.directory()); // delete other files
- }
-
- writeDeleteableFiles(deletable); // note files we can't delete
- }
-
- private final void deleteFiles(Vector files) throws IOException {
- Vector deletable = new Vector();
- deleteFiles(readDeleteableFiles(), deletable); // try to delete deleteable
- deleteFiles(files, deletable); // try to delete our files
- writeDeleteableFiles(deletable); // note files we can't delete
- }
-
- private final void deleteFiles(Vector files, Directory directory)
- throws IOException {
- for (int i = 0; i < files.size(); i++)
- directory.deleteFile((String)files.elementAt(i));
- }
-
- private final void deleteFiles(Vector files, Vector deletable)
- throws IOException {
- for (int i = 0; i < files.size(); i++) {
- String file = (String)files.elementAt(i);
- try {
- directory.deleteFile(file); // try to delete each file
- } catch (IOException e) { // if delete fails
- if (directory.fileExists(file)) {
- if (infoStream != null)
- infoStream.println(e.toString() + "; Will re-try later.");
- deletable.addElement(file); // add to deletable
- }
- }
- }
- }
-
- private final Vector readDeleteableFiles() throws IOException {
- Vector result = new Vector();
- if (!directory.fileExists(IndexFileNames.DELETABLE))
- return result;
-
- IndexInput input = directory.openInput(IndexFileNames.DELETABLE);
- try {
- for (int i = input.readInt(); i > 0; i--) // read file names
- result.addElement(input.readString());
- } finally {
- input.close();
- }
- return result;
- }
-
- private final void writeDeleteableFiles(Vector files) throws IOException {
- IndexOutput output = directory.createOutput("deleteable.new");
- try {
- output.writeInt(files.size());
- for (int i = 0; i < files.size(); i++)
- output.writeString((String)files.elementAt(i));
- } finally {
- output.close();
- }
- directory.renameFile("deleteable.new", IndexFileNames.DELETABLE);
- }
-
private final boolean checkNonDecreasingLevels(int start) {
int lowerBound = -1;
int upperBound = minMergeDocs;
Index: src/java/org/apache/lucene/index/IndexFileDeleter.java
===================================================================
--- src/java/org/apache/lucene/index/IndexFileDeleter.java (revision 0)
+++ src/java/org/apache/lucene/index/IndexFileDeleter.java (revision 0)
@@ -0,0 +1,182 @@
+package org.apache.lucene.index;
+
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.IndexFileNameFilter;
+import org.apache.lucene.index.SegmentInfos;
+import org.apache.lucene.store.Directory;
+
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.Vector;
+import java.util.HashMap;
+
+/**
+ * A utility class (used by both IndexReader and
+ * IndexWriter) to keep track of files that need to be
+ * deleted because they are no longer referenced by the
+ * index.
+ */
+public class IndexFileDeleter {
+ private Vector deletable;
+ private Vector pending;
+ private Directory directory;
+ private SegmentInfos segmentInfos;
+ private PrintStream infoStream;
+
+ public IndexFileDeleter(SegmentInfos segmentInfos, Directory directory)
+ throws IOException {
+ this.segmentInfos = segmentInfos;
+ this.directory = directory;
+ }
+
+ void setInfoStream(PrintStream infoStream) {
+ this.infoStream = infoStream;
+ }
+
+ /** Determine index files that are no longer referenced
+ * and therefore should be deleted. This is called once
+ * (by the writer), and then subsequently we add onto
+ * deletable any files that are no longer needed at the
+ * point that we create the unused file (eg when merging
+ * segments), and we only remove from deletable when a
+ * file is successfully deleted.
+ */
+
+ public void findDeletableFiles() throws IOException {
+
+ // Gather all "current" segments:
+ HashMap current = new HashMap();
+ for(int j=0;j.cfs exists)
+
public SegmentInfo(String name, int docCount, Directory dir) {
this.name = name;
this.docCount = docCount;
this.dir = dir;
+ delGen = -1;
+ isCompoundFile = 0;
+ preLockless = true;
}
+ public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile) {
+ this(name, docCount, dir);
+ if (isCompoundFile) {
+ this.isCompoundFile = 1;
+ } else {
+ this.isCompoundFile = -1;
+ }
+ preLockless = false;
+ }
+
+
+ /**
+ * Construct a new SegmentInfo instance by reading a
+ * previously saved SegmentInfo from input.
+ *
+ * @param dir directory to load from
+ * @param format format of the segments info file
+ * @param input input handle to read segment info from
+ */
+ public SegmentInfo(Directory dir, int format, IndexInput input) throws IOException {
+ this.dir = dir;
+ name = input.readString();
+ docCount = input.readInt();
+ if (format <= SegmentInfos.FORMAT_LOCKLESS) {
+ delGen = input.readLong();
+ int numNormGen = input.readInt();
+ if (numNormGen == -1) {
+ normGen = null;
+ } else {
+ normGen = new long[numNormGen];
+ for(int j=0;j 0: this means this segment was written by
+ // the LOCKLESS code and for certain has
+ // deletions
+ //
+ if (delGen == -1) {
+ return false;
+ } else if (delGen > 0) {
+ return true;
+ } else {
+ return dir.fileExists(getDelFileName());
+ }
+ }
+
+ void advanceDelGen() {
+ // delGen 0 is reserved for pre-LOCKLESS format
+ if (delGen == -1) {
+ delGen = 1;
+ } else {
+ delGen++;
+ }
+ }
+
+ void clearDelGen() {
+ delGen = -1;
+ }
+
+ String getDelFileName() {
+ if (delGen == -1) {
+ // In this case we know there is no deletion filename
+ // against this segment
+ return null;
+ } else {
+ // If delGen is 0, it's the pre-lockless-commit file format
+ return IndexFileNames.fileNameFromGeneration(name, ".del", delGen);
+ }
+ }
+
+ /**
+ * Returns true if this field for this segment has saved a separate norms file (__N.sX).
+ *
+ * @param fieldNumber the field index to check
+ */
+ boolean hasSeparateNorms(int fieldNumber)
+ throws IOException {
+ if ((normGen == null && preLockless) || (normGen != null && normGen[fieldNumber] == 0)) {
+ // Must fallback to directory file exists check:
+ String fileName = name + ".s" + fieldNumber;
+ return dir.fileExists(fileName);
+ } else if (normGen == null || normGen[fieldNumber] == -1) {
+ return false;
+ } else {
+ return true;
+ }
+ }
+
+ /**
+ * Returns true if any fields in this segment have separate norms.
+ */
+ boolean hasSeparateNorms()
+ throws IOException {
+ if (normGen == null) {
+ if (!preLockless) {
+ // This means we were created w/ LOCKLESS code and no
+ // norms are written yet:
+ return false;
+ } else {
+ // This means this segment was saved with pre-LOCKLESS
+ // code. So we must fallback to the original
+ // directory list check:
+ String[] result = dir.list();
+ String pattern;
+ pattern = name + ".s";
+ int patternLength = pattern.length();
+ for(int i = 0; i < result.length; i++){
+ if(result[i].startsWith(pattern) && Character.isDigit(result[i].charAt(patternLength)))
+ return true;
+ }
+ return false;
+ }
+ } else {
+ // This means this segment was saved with LOCKLESS
+ // code so we first check whether any normGen's are >
+ // 0 (meaning they definitely have separate norms):
+ for(int i=0;i 0) {
+ return true;
+ }
+ }
+ // Next we look for any == 0. These cases were
+ // pre-LOCKLESS and must be checked in directory:
+ for(int i=0;i
This document defines the index file formats used
- in Lucene version 2.0. If you are using a different
+ in Lucene version XXX. If you are using a different
version of Lucene, please consult the copy of
docs/fileformats.html that was distributed
with the version you are using.
@@ -141,6 +141,15 @@
Compatibility notes are provided in this document,
describing how file formats have changed from prior versions.
+
+ In version XXX, the file format was changed to allow
+ lock-less commits. The change is fully backwards
+ compatible: you can open a pre-XXX index for searching
+ or adding/deleting of docs. When the new segments
+ file is saved (committed), it will be written in the
+ new file format (meaning no specific "upgrade" process
+ is needed).
+
@@ -402,6 +411,15 @@
in an index are stored in a single directory, although this is not
required.
+
+ As of version XXX (lock-less commits), file names are
+ never re-used. That is, when any file is saved to the
+ Directory it is given a never before used filename.
+ This is achieved using a simple generations approach.
+ For example, the first segments file is segments_1,
+ then segments_2, etc. The generation is a sequential
+ long integer represented in alpha-numeric (base 36) form.
+
@@ -1078,27 +1096,53 @@
The active segments in the index are stored in the
- segment info file. An index only has
- a single file in this format, and it is named "segments".
- This lists each segment by name, and also contains the size of each
- segment.
+ segment info file, segments_N. There may
+ be one or more segments_N files in the
+ index; however, the one with the largest
+ generation is the active one (when older
+ segments_N files are present it's because they
+ temporarily cannot be deleted, or, a writer is in
+ the process of committing). This file lists each
+ segment by name, has details about the separate
+ norms and deletion files, and also contains the
+ size of each segment.
+ As of XXX, there is also a file
+ segments.gen. This file contains the
+ current generation (the _N in
+ segments_N) of the index. This is
+ recorded only as a fallback in case the current
+ generation cannot be accurately determined by
+ directory listing alone (as is the case for some
+ NFS clients with time-based directory cache
+ expiraation). This file simply contains the
+ generation recored as Int64, written twice.
+
+
+ Pre-XXX:
Segments --> Format, Version, NameCounter, SegCount, <SegName, SegSize>SegCount
- Format, NameCounter, SegCount, SegSize --> UInt32
+ XXX and above:
+ Segments --> Format, Version, NameCounter, SegCount, <SegName, SegSize, DelGen, NumField, NormGenNumField >SegCount, IsCompoundFile
- Version --> UInt64
+ Format, NameCounter, SegCount, SegSize, NumField --> UInt32
+ Version, DelGen, NormGen --> UInt64
+
+
SegName --> String
- Format is -1 in Lucene 1.4.
+ IsCompoundFile --> Int8
+ Format is -1 as of Lucene 1.4 and -2 as of XXX.
+
+
Version counts how often the index has been
changed by adding or deleting documents.
@@ -1134,15 +1178,20 @@
-
- When a file named "commit.lock"
- is present, a process is currently re-writing the "segments"
- file and deleting outdated segment index files, or a process is
- reading the "segments"
- file and opening the files of the segments it names. This lock file
- prevents files from being deleted by another process after a process
- has read the "segments"
- file but before it has managed to open all of the files of the
- segments named therein.
+ When a file named "commit.lock" is
+ present, a process is currently re-writing
+ the "segments" file and deleting outdated
+ segment index files, or a process is
+ reading the "segments" file and opening
+ the files of the segments it names. This
+ lock file prevents files from being
+ deleted by another process after a process
+ has read the "segments" file but before it
+ has managed to open all of the files of
+ the segments named therein. As of XXX,
+ the "commit.lock" is no longer used
+ because readers are able to open an index
+ even while a writer is committing.
@@ -1168,11 +1217,15 @@
- A file named "deletable"
- contains the names of files that are no longer used by the index, but
- which could not be deleted. This is only used on Win32, where a
- file may not be deleted while it is still open. On other platforms
- the file contains only null bytes.
+ A file named "deletable" contains the names of
+ files that are no longer used by the index, but
+ which could not be deleted. This is only used on
+ Win32, where a file may not be deleted while it is
+ still open. On other platforms the file contains
+ only null bytes. As of version XXX, there is no
+ deleteable file. Instead, the deletable files are
+ computed on creating a writer and then maintained
+ as an in-memory vector.
Deletable --> DeletableCount,
Index: xdocs/fileformats.xml
===================================================================
--- xdocs/fileformats.xml (revision 468583)
+++ xdocs/fileformats.xml (working copy)
@@ -14,7 +14,7 @@
This document defines the index file formats used
- in Lucene version 2.0. If you are using a different
+ in Lucene version XXX. If you are using a different
version of Lucene, please consult the copy of
docs/fileformats.html that was distributed
with the version you are using.
@@ -43,6 +43,16 @@
describing how file formats have changed from prior versions.
+
+ In version XXX, the file format was changed to allow
+ lock-less commits. The change is fully backwards
+ compatible: you can open a pre-XXX index for searching
+ or adding/deleting of docs. When the new segments
+ file is saved (committed), it will be written in the
+ new file format (meaning no specific "upgrade" process
+ is needed).
+
+
@@ -260,6 +270,16 @@
required.
+
+ As of version XXX (lock-less commits), file names are
+ never re-used. That is, when any file is saved to the
+ Directory it is given a never before used filename.
+ This is achieved using a simple generations approach.
+ For example, the first segments file is segments_1,
+ then segments_2, etc. The generation is a sequential
+ long integer represented in alpha-numeric (base 36) form.
+
+
@@ -696,22 +716,46 @@
The active segments in the index are stored in the
- segment info file. An index only has
- a single file in this format, and it is named "segments".
- This lists each segment by name, and also contains the size of each
- segment.
+ segment info file, segments_N. There may
+ be one or more segments_N files in the
+ index; however, the one with the largest
+ generation is the active one (when older
+ segments_N files are present it's because they
+ temporarily cannot be deleted, or, a writer is in
+ the process of committing). This file lists each
+ segment by name, has details about the separate
+ norms and deletion files, and also contains the
+ size of each segment.
+
+ As of XXX, there is also a file
+ segments.gen. This file contains the
+ current generation (the _N in
+ segments_N) of the index. This is
+ recorded only as a fallback in case the current
+ generation cannot be accurately determined by
+ directory listing alone (as is the case for some
+ NFS clients with time-based directory cache
+ expiraation). This file simply contains the
+ generation recored as Int64, written twice.
+
+
+ Pre-XXX:
Segments --> Format, Version, NameCounter, SegCount, <SegName, SegSize>SegCount
+
+ XXX and above:
+ Segments --> Format, Version, NameCounter, SegCount, <SegName, SegSize, DelGen, NumField, NormGenNumField >SegCount, IsCompoundFile
+
- Format, NameCounter, SegCount, SegSize --> UInt32
+ Format, NameCounter, SegCount, SegSize, NumField --> UInt32
- Version --> UInt64
+ Version, DelGen, NormGen --> UInt64
@@ -719,10 +763,14 @@
- Format is -1 in Lucene 1.4.
+ IsCompoundFile --> Int8
+ Format is -1 as of Lucene 1.4 and -2 as of XXX.
+
+
+
Version counts how often the index has been
changed by adding or deleting documents.
@@ -756,15 +804,20 @@
-
- When a file named "commit.lock"
- is present, a process is currently re-writing the "segments"
- file and deleting outdated segment index files, or a process is
- reading the "segments"
- file and opening the files of the segments it names. This lock file
- prevents files from being deleted by another process after a process
- has read the "segments"
- file but before it has managed to open all of the files of the
- segments named therein.
+ When a file named "commit.lock" is
+ present, a process is currently re-writing
+ the "segments" file and deleting outdated
+ segment index files, or a process is
+ reading the "segments" file and opening
+ the files of the segments it names. This
+ lock file prevents files from being
+ deleted by another process after a process
+ has read the "segments" file but before it
+ has managed to open all of the files of
+ the segments named therein. As of XXX,
+ the "commit.lock" is no longer used
+ because readers are able to open an index
+ even while a writer is committing.
@@ -782,11 +835,15 @@
- A file named "deletable"
- contains the names of files that are no longer used by the index, but
- which could not be deleted. This is only used on Win32, where a
- file may not be deleted while it is still open. On other platforms
- the file contains only null bytes.
+ A file named "deletable" contains the names of
+ files that are no longer used by the index, but
+ which could not be deleted. This is only used on
+ Win32, where a file may not be deleted while it is
+ still open. On other platforms the file contains
+ only null bytes. As of version XXX, there is no
+ deleteable file. Instead, the deletable files are
+ computed on creating a writer and then maintained
+ as an in-memory vector.
|