Index: src/test/org/apache/lucene/store/TestLockFactory.java =================================================================== --- src/test/org/apache/lucene/store/TestLockFactory.java (revision 470932) +++ src/test/org/apache/lucene/store/TestLockFactory.java (working copy) @@ -57,9 +57,9 @@ // Both write lock and commit lock should have been created: assertEquals("# of unique locks created (after instantiating IndexWriter)", - 2, lf.locksCreated.size()); - assertTrue("# calls to makeLock <= 2 (after instantiating IndexWriter)", - lf.makeLockCount > 2); + 1, lf.locksCreated.size()); + assertTrue("# calls to makeLock is 0 (after instantiating IndexWriter)", + lf.makeLockCount >= 1); for(Enumeration e = lf.locksCreated.keys(); e.hasMoreElements();) { String lockName = (String) e.nextElement(); @@ -89,6 +89,7 @@ try { writer2 = new IndexWriter(dir, new WhitespaceAnalyzer(), false); } catch (Exception e) { + e.printStackTrace(System.out); fail("Should not have hit an IOException with no locking"); } @@ -233,6 +234,7 @@ try { writer2 = new IndexWriter(indexDirName, new WhitespaceAnalyzer(), false); } catch (IOException e) { + e.printStackTrace(System.out); fail("Should not have hit an IOException with locking disabled"); } @@ -265,6 +267,7 @@ try { fs2 = FSDirectory.getDirectory(indexDirName, true, lf); } catch (IOException e) { + e.printStackTrace(System.out); fail("Should not have hit an IOException because LockFactory instances are the same"); } @@ -293,7 +296,6 @@ public void _testStressLocks(LockFactory lockFactory, String indexDirName) throws IOException { FSDirectory fs1 = FSDirectory.getDirectory(indexDirName, true, lockFactory); - // fs1.setLockFactory(NoLockFactory.getNoLockFactory()); // First create a 1 doc index: IndexWriter w = new IndexWriter(fs1, new WhitespaceAnalyzer(), true); @@ -404,6 +406,7 @@ hitException = true; System.out.println("Stress Test Index Writer: creation hit unexpected exception: " + e.toString()); e.printStackTrace(System.out); + break; } if (writer != null) { try { @@ -412,6 +415,7 @@ hitException = true; System.out.println("Stress Test Index Writer: addDoc hit unexpected exception: " + e.toString()); e.printStackTrace(System.out); + break; } try { writer.close(); @@ -419,6 +423,7 @@ hitException = true; System.out.println("Stress Test Index Writer: close hit unexpected exception: " + e.toString()); e.printStackTrace(System.out); + break; } writer = null; } @@ -445,6 +450,7 @@ hitException = true; System.out.println("Stress Test Index Searcher: create hit unexpected exception: " + e.toString()); e.printStackTrace(System.out); + break; } if (searcher != null) { Hits hits = null; @@ -454,6 +460,7 @@ hitException = true; System.out.println("Stress Test Index Searcher: search hit unexpected exception: " + e.toString()); e.printStackTrace(System.out); + break; } // System.out.println(hits.length() + " total results"); try { @@ -462,6 +469,7 @@ hitException = true; System.out.println("Stress Test Index Searcher: close hit unexpected exception: " + e.toString()); e.printStackTrace(System.out); + break; } searcher = null; } Index: src/test/org/apache/lucene/index/TestIndexWriter.java =================================================================== --- src/test/org/apache/lucene/index/TestIndexWriter.java (revision 470932) +++ src/test/org/apache/lucene/index/TestIndexWriter.java (working copy) @@ -1,6 +1,7 @@ package org.apache.lucene.index; import java.io.IOException; +import java.io.File; import junit.framework.TestCase; @@ -10,7 +11,10 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; /** @@ -28,14 +32,11 @@ int i; IndexWriter.setDefaultWriteLockTimeout(2000); - IndexWriter.setDefaultCommitLockTimeout(2000); assertEquals(2000, IndexWriter.getDefaultWriteLockTimeout()); - assertEquals(2000, IndexWriter.getDefaultCommitLockTimeout()); writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); IndexWriter.setDefaultWriteLockTimeout(1000); - IndexWriter.setDefaultCommitLockTimeout(1000); // add 100 documents for (i = 0; i < 100; i++) { @@ -72,6 +73,12 @@ assertEquals(60, reader.maxDoc()); assertEquals(60, reader.numDocs()); reader.close(); + + // make sure opening a new index for create over + // this existing one works correctly: + writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); + assertEquals(0, writer.docCount()); + writer.close(); } private void addDoc(IndexWriter writer) throws IOException @@ -80,4 +87,192 @@ doc.add(new Field("content", "aaa", Field.Store.NO, Field.Index.TOKENIZED)); writer.addDocument(doc); } + + // Make sure we can open an index for create even when a + // reader holds it open (this fails pre lock-less + // commits on windows): + public void testCreateWithReader() throws IOException { + String tempDir = System.getProperty("java.io.tmpdir"); + if (tempDir == null) + throw new IOException("java.io.tmpdir undefined, cannot run test"); + File indexDir = new File(tempDir, "lucenetestindexwriter"); + Directory dir = FSDirectory.getDirectory(indexDir, true); + + // add one document & close writer + IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); + addDoc(writer); + writer.close(); + + // now open reader: + IndexReader reader = IndexReader.open(dir); + assertEquals("should be one document", reader.numDocs(), 1); + + // now open index for create: + writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); + assertEquals("should be zero documents", writer.docCount(), 0); + addDoc(writer); + writer.close(); + + assertEquals("should be one document", reader.numDocs(), 1); + IndexReader reader2 = IndexReader.open(dir); + assertEquals("should be one document", reader2.numDocs(), 1); + reader.close(); + reader2.close(); + rmDir(indexDir); + } + + // Simulate a writer that crashed while writing segments + // file: make sure we can still open the index (ie, + // gracefully fallback to the previous segments file), + // and that we can add to the index: + public void testSimulatedCrashedWriter() throws IOException { + Directory dir = new RAMDirectory(); + + IndexWriter writer = null; + + writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); + + // add 100 documents + for (int i = 0; i < 100; i++) { + addDoc(writer); + } + + // close + writer.close(); + + long gen = SegmentInfos.getCurrentSegmentGeneration(dir); + assertTrue("segment generation should be > 1 but got " + gen, gen > 1); + + // Make the next segments file, with last byte + // missing, to simulate a writer that crashed while + // writing segments file: + String fileNameIn = SegmentInfos.getCurrentSegmentFileName(dir); + String fileNameOut = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, + "", + 1+gen); + IndexInput in = dir.openInput(fileNameIn); + IndexOutput out = dir.createOutput(fileNameOut); + long length = in.length(); + for(int i=0;i 1 but got " + gen, gen > 1); + + String fileNameIn = SegmentInfos.getCurrentSegmentFileName(dir); + String fileNameOut = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, + "", + 1+gen); + IndexInput in = dir.openInput(fileNameIn); + IndexOutput out = dir.createOutput(fileNameOut); + long length = in.length(); + for(int i=0;i 1 but got " + gen, gen > 1); + + String[] files = dir.list(); + for(int i=0;i 0) { + s += "\n "; + } + s += l[i]; + } + return s; + } + + public void copyFile(Directory dir, String src, String dest) throws IOException { + IndexInput in = dir.openInput(src); + IndexOutput out = dir.createOutput(dest); + byte[] b = new byte[1024]; + long remainder = in.length(); + while(remainder > 0) { + int len = (int) Math.min(b.length, remainder); + in.readBytes(b, 0, len); + out.writeBytes(b, len); + remainder -= len; + } + } + + private void addDoc(IndexWriter writer, int id) throws IOException + { + Document doc = new Document(); + doc.add(new Field("content", "aaa", Field.Store.NO, Field.Index.TOKENIZED)); + doc.add(new Field("id", Integer.toString(id), Field.Store.YES, Field.Index.UN_TOKENIZED)); + writer.addDocument(doc); + } +} Index: src/test/org/apache/lucene/index/TestMultiReader.java =================================================================== --- src/test/org/apache/lucene/index/TestMultiReader.java (revision 470932) +++ src/test/org/apache/lucene/index/TestMultiReader.java (working copy) @@ -79,6 +79,21 @@ assertEquals( 1, reader.numDocs() ); reader.undeleteAll(); assertEquals( 2, reader.numDocs() ); + + // Ensure undeleteAll survives commit/close/reopen: + reader.commit(); + reader.close(); + sis.read(dir); + reader = new MultiReader(dir, sis, false, readers); + assertEquals( 2, reader.numDocs() ); + + reader.deleteDocument(0); + assertEquals( 1, reader.numDocs() ); + reader.commit(); + reader.close(); + sis.read(dir); + reader = new MultiReader(dir, sis, false, readers); + assertEquals( 1, reader.numDocs() ); } Index: src/test/org/apache/lucene/index/TestIndexReader.java =================================================================== --- src/test/org/apache/lucene/index/TestIndexReader.java (revision 470932) +++ src/test/org/apache/lucene/index/TestIndexReader.java (working copy) @@ -31,6 +31,7 @@ import java.util.Collection; import java.io.IOException; +import java.io.FileNotFoundException; import java.io.File; public class TestIndexReader extends TestCase @@ -221,6 +222,11 @@ assertEquals("deleted count", 100, deleted); assertEquals("deleted docFreq", 100, reader.docFreq(searchTerm)); assertTermDocsCount("deleted termDocs", reader, searchTerm, 0); + + // open a 2nd reader to make sure first reader can + // commit its changes (.del) while second reader + // is open: + IndexReader reader2 = IndexReader.open(dir); reader.close(); // CREATE A NEW READER and re-test @@ -230,10 +236,73 @@ reader.close(); } + // Make sure you can set norms & commit even if a reader + // is open against the index: + public void testWritingNorms() throws IOException + { + String tempDir = System.getProperty("tempDir"); + if (tempDir == null) + throw new IOException("tempDir undefined, cannot run test"); + File indexDir = new File(tempDir, "lucenetestnormwriter"); + Directory dir = FSDirectory.getDirectory(indexDir, true); + IndexWriter writer = null; + IndexReader reader = null; + Term searchTerm = new Term("content", "aaa"); + + // add 1 documents with term : aaa + writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); + addDoc(writer, searchTerm.text()); + writer.close(); + + // now open reader & set norm for doc 0 + reader = IndexReader.open(dir); + reader.setNorm(0, "content", (float) 2.0); + + // we should be holding the write lock now: + assertTrue("locked", IndexReader.isLocked(dir)); + + reader.commit(); + + // we should not be holding the write lock now: + assertTrue("not locked", !IndexReader.isLocked(dir)); + + // open a 2nd reader: + IndexReader reader2 = IndexReader.open(dir); + + // set norm again for doc 0 + reader.setNorm(0, "content", (float) 3.0); + assertTrue("locked", IndexReader.isLocked(dir)); + + reader.close(); + + // we should not be holding the write lock now: + assertTrue("not locked", !IndexReader.isLocked(dir)); + + reader2.close(); + dir.close(); + + rmDir(indexDir); + } + + public void testDeleteReaderWriterConflictUnoptimized() throws IOException{ deleteReaderWriterConflict(false); } + + public void testOpenEmptyDirectory() throws IOException{ + String dirName = "test.empty"; + File fileDirName = new File(dirName); + if (!fileDirName.exists()) { + fileDirName.mkdir(); + } + try { + IndexReader reader = IndexReader.open(fileDirName); + fail("opening IndexReader on empty directory failed to produce FileNotFoundException"); + } catch (FileNotFoundException e) { + // GOOD + } + } public void testDeleteReaderWriterConflictOptimized() throws IOException{ deleteReaderWriterConflict(true); @@ -367,15 +436,39 @@ assertFalse(IndexReader.isLocked(dir)); // reader only, no lock long version = IndexReader.lastModified(dir); reader.close(); - // modify index and check version has been incremented: + // modify index and check version has been + // incremented: writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); addDocumentWithFields(writer); writer.close(); reader = IndexReader.open(dir); - assertTrue(version < IndexReader.getCurrentVersion(dir)); + assertTrue("old lastModified is " + version + "; new lastModified is " + IndexReader.lastModified(dir), version <= IndexReader.lastModified(dir)); reader.close(); } + public void testVersion() throws IOException { + assertFalse(IndexReader.indexExists("there_is_no_such_index")); + Directory dir = new RAMDirectory(); + assertFalse(IndexReader.indexExists(dir)); + IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); + addDocumentWithFields(writer); + assertTrue(IndexReader.isLocked(dir)); // writer open, so dir is locked + writer.close(); + assertTrue(IndexReader.indexExists(dir)); + IndexReader reader = IndexReader.open(dir); + assertFalse(IndexReader.isLocked(dir)); // reader only, no lock + long version = IndexReader.getCurrentVersion(dir); + reader.close(); + // modify index and check version has been + // incremented: + writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); + addDocumentWithFields(writer); + writer.close(); + reader = IndexReader.open(dir); + assertTrue("old version is " + version + "; new version is " + IndexReader.getCurrentVersion(dir), version < IndexReader.getCurrentVersion(dir)); + reader.close(); + } + public void testLock() throws IOException { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); @@ -411,6 +504,40 @@ reader.close(); } + public void testUndeleteAllAfterClose() throws IOException { + Directory dir = new RAMDirectory(); + IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); + addDocumentWithFields(writer); + addDocumentWithFields(writer); + writer.close(); + IndexReader reader = IndexReader.open(dir); + reader.deleteDocument(0); + reader.deleteDocument(1); + reader.close(); + reader = IndexReader.open(dir); + reader.undeleteAll(); + assertEquals(2, reader.numDocs()); // nothing has really been deleted thanks to undeleteAll() + reader.close(); + } + + public void testUndeleteAllAfterCloseThenReopen() throws IOException { + Directory dir = new RAMDirectory(); + IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); + addDocumentWithFields(writer); + addDocumentWithFields(writer); + writer.close(); + IndexReader reader = IndexReader.open(dir); + reader.deleteDocument(0); + reader.deleteDocument(1); + reader.close(); + reader = IndexReader.open(dir); + reader.undeleteAll(); + reader.close(); + reader = IndexReader.open(dir); + assertEquals(2, reader.numDocs()); // nothing has really been deleted thanks to undeleteAll() + reader.close(); + } + public void testDeleteReaderReaderConflictUnoptimized() throws IOException{ deleteReaderReaderConflict(false); } @@ -561,4 +688,11 @@ doc.add(new Field("content", value, Field.Store.NO, Field.Index.TOKENIZED)); writer.addDocument(doc); } + private void rmDir(File dir) { + File[] files = dir.listFiles(); + for (int i = 0; i < files.length; i++) { + files[i].delete(); + } + dir.delete(); + } } Index: src/java/org/apache/lucene/index/MultiReader.java =================================================================== --- src/java/org/apache/lucene/index/MultiReader.java (revision 470932) +++ src/java/org/apache/lucene/index/MultiReader.java (working copy) @@ -217,6 +217,13 @@ return new MultiTermPositions(subReaders, starts); } + protected void setDeleter(IndexFileDeleter deleter) { + // Share deleter to our SegmentReaders: + this.deleter = deleter; + for (int i = 0; i < subReaders.length; i++) + subReaders[i].setDeleter(deleter); + } + protected void doCommit() throws IOException { for (int i = 0; i < subReaders.length; i++) subReaders[i].commit(); Index: src/java/org/apache/lucene/index/IndexReader.java =================================================================== --- src/java/org/apache/lucene/index/IndexReader.java (revision 470932) +++ src/java/org/apache/lucene/index/IndexReader.java (working copy) @@ -112,6 +112,7 @@ private Directory directory; private boolean directoryOwner; private boolean closeDirectory; + protected IndexFileDeleter deleter; private SegmentInfos segmentInfos; private Lock writeLock; @@ -137,24 +138,40 @@ } private static IndexReader open(final Directory directory, final boolean closeDirectory) throws IOException { - synchronized (directory) { // in- & inter-process sync - return (IndexReader)new Lock.With( - directory.makeLock(IndexWriter.COMMIT_LOCK_NAME), - IndexWriter.COMMIT_LOCK_TIMEOUT) { - public Object doBody() throws IOException { - SegmentInfos infos = new SegmentInfos(); - infos.read(directory); - if (infos.size() == 1) { // index is optimized - return SegmentReader.get(infos, infos.info(0), closeDirectory); + + return (IndexReader) new SegmentInfos.FindSegmentsFile(directory) { + + public Object doBody(String segmentFileName) throws IOException { + + SegmentInfos infos = new SegmentInfos(); + infos.read(directory, segmentFileName); + + if (infos.size() == 1) { // index is optimized + return SegmentReader.get(infos, infos.info(0), closeDirectory); + } else { + + // To reduce the chance of hitting FileNotFound + // (and having to retry), we open segments in + // reverse because IndexWriter merges & deletes + // the newest segments first. + + IndexReader[] readers = new IndexReader[infos.size()]; + for (int i = infos.size()-1; i >= 0; i--) { + try { + readers[i] = SegmentReader.get(infos.info(i)); + } catch (IOException e) { + // Close all readers we had opened: + for(i++;itrue if an index exists; false otherwise */ public static boolean indexExists(String directory) { - return (new File(directory, IndexFileNames.SEGMENTS)).exists(); + return indexExists(new File(directory)); } /** @@ -327,8 +324,9 @@ * @param directory the directory to check for an index * @return true if an index exists; false otherwise */ + public static boolean indexExists(File directory) { - return (new File(directory, IndexFileNames.SEGMENTS)).exists(); + return SegmentInfos.getCurrentSegmentGeneration(directory.list()) != -1; } /** @@ -339,7 +337,7 @@ * @throws IOException if there is a problem with accessing the index */ public static boolean indexExists(Directory directory) throws IOException { - return directory.fileExists(IndexFileNames.SEGMENTS); + return SegmentInfos.getCurrentSegmentGeneration(directory) != -1; } /** Returns the number of documents in this index. */ @@ -591,17 +589,22 @@ */ protected final synchronized void commit() throws IOException{ if(hasChanges){ + if (deleter == null) { + // In the MultiReader case, we share this deleter + // across all SegmentReaders: + setDeleter(new IndexFileDeleter(segmentInfos, directory)); + deleter.deleteFiles(); + } if(directoryOwner){ - synchronized (directory) { // in- & inter-process sync - new Lock.With(directory.makeLock(IndexWriter.COMMIT_LOCK_NAME), - IndexWriter.COMMIT_LOCK_TIMEOUT) { - public Object doBody() throws IOException { - doCommit(); - segmentInfos.write(directory); - return null; - } - }.run(); - } + deleter.clearPendingFiles(); + doCommit(); + String oldInfoFileName = segmentInfos.getCurrentSegmentFileName(); + segmentInfos.write(directory); + // Attempt to delete all files we just obsoleted: + + deleter.deleteFile(oldInfoFileName); + deleter.commitPendingFiles(); + deleter.deleteFiles(); if (writeLock != null) { writeLock.release(); // release write lock writeLock = null; @@ -613,6 +616,13 @@ hasChanges = false; } + protected void setDeleter(IndexFileDeleter deleter) { + this.deleter = deleter; + } + protected IndexFileDeleter getDeleter() { + return deleter; + } + /** Implements commit. */ protected abstract void doCommit() throws IOException; @@ -657,8 +667,7 @@ */ public static boolean isLocked(Directory directory) throws IOException { return - directory.makeLock(IndexWriter.WRITE_LOCK_NAME).isLocked() || - directory.makeLock(IndexWriter.COMMIT_LOCK_NAME).isLocked(); + directory.makeLock(IndexWriter.WRITE_LOCK_NAME).isLocked(); } /** @@ -683,7 +692,6 @@ */ public static void unlock(Directory directory) throws IOException { directory.makeLock(IndexWriter.WRITE_LOCK_NAME).release(); - directory.makeLock(IndexWriter.COMMIT_LOCK_NAME).release(); } /** Index: src/java/org/apache/lucene/index/IndexFileNames.java =================================================================== --- src/java/org/apache/lucene/index/IndexFileNames.java (revision 470932) +++ src/java/org/apache/lucene/index/IndexFileNames.java (working copy) @@ -26,19 +26,25 @@ /** Name of the index segment file */ static final String SEGMENTS = "segments"; + + /** Name of the generation reference file name */ + static final String SEGMENTS_GEN = "segments.gen"; - /** Name of the index deletable file */ + /** Name of the index deletable file (only used in + * pre-lockless indices) */ static final String DELETABLE = "deletable"; - + /** - * This array contains all filename extensions used by Lucene's index files, with - * one exception, namely the extension made up from .f + a number. - * Also note that two of Lucene's files (deletable and - * segments) don't have any filename extension. + * This array contains all filename extensions used by + * Lucene's index files, with two exceptions, namely the + * extension made up from .f + a number and + * from .s + a number. Also note that + * Lucene's segments_N files do not have any + * filename extension. */ static final String INDEX_EXTENSIONS[] = new String[] { "cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del", - "tvx", "tvd", "tvf", "tvp" }; + "tvx", "tvd", "tvf", "tvp", "gen"}; /** File extensions of old-style index files */ static final String COMPOUND_EXTENSIONS[] = new String[] { @@ -49,5 +55,24 @@ static final String VECTOR_EXTENSIONS[] = new String[] { "tvx", "tvd", "tvf" }; - + + /** + * Computes the full file name from base, extension and + * generation. If the generation is -1, the file name is + * null. If it's 0, the file name is . + * If it's > 0, the file name is _. + * + * @param base -- main part of the file name + * @param extension -- extension of the filename (including .) + * @param gen -- generation + */ + public static final String fileNameFromGeneration(String base, String extension, long gen) { + if (gen == -1) { + return null; + } else if (gen == 0) { + return base + extension; + } else { + return base + "_" + Long.toString(gen, Character.MAX_RADIX) + extension; + } + } } Index: src/java/org/apache/lucene/index/SegmentInfos.java =================================================================== --- src/java/org/apache/lucene/index/SegmentInfos.java (revision 470932) +++ src/java/org/apache/lucene/index/SegmentInfos.java (working copy) @@ -18,36 +18,151 @@ import java.util.Vector; import java.io.IOException; +import java.io.PrintStream; +import java.io.File; +import java.io.FileNotFoundException; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.Constants; -final class SegmentInfos extends Vector { +public final class SegmentInfos extends Vector { /** The file format version, a negative number. */ /* Works since counter, the old 1st entry, is always >= 0 */ public static final int FORMAT = -1; - + + /** This is the current file format written. It differs + * slightly from the previous format in that file names + * are never re-used (write once). Instead, each file is + * written to the next generation. For example, + * segments_1, segments_2, etc. This allows us to not use + * a commit lock. See file + * formats for details. + */ + public static final int FORMAT_LOCKLESS = -2; + public int counter = 0; // used to name new segments /** * counts how often the index has been changed by adding or deleting docs. * starting with the current time in milliseconds forces to create unique version numbers. */ private long version = System.currentTimeMillis(); + private long generation = 0; // generation of the "segments_N" file we read + /** + * If non-null, information about loading segments_N files + * will be printed here. @see #setInfoStream. + */ + private static PrintStream infoStream; + public final SegmentInfo info(int i) { return (SegmentInfo) elementAt(i); } - public final void read(Directory directory) throws IOException { - - IndexInput input = directory.openInput(IndexFileNames.SEGMENTS); + /** + * Get the generation (N) of the current segments_N file + * from a list of files. + * + * @param files -- array of file names to check + */ + public static long getCurrentSegmentGeneration(String[] files) { + if (files == null) { + return -1; + } + long max = -1; + int prefixLen = IndexFileNames.SEGMENTS.length()+1; + for (int i = 0; i < files.length; i++) { + String file = files[i]; + if (file.startsWith(IndexFileNames.SEGMENTS) && !file.equals(IndexFileNames.SEGMENTS_GEN)) { + if (file.equals(IndexFileNames.SEGMENTS)) { + // Pre lock-less commits: + if (max == -1) { + max = 0; + } + } else { + long v = Long.parseLong(file.substring(prefixLen), Character.MAX_RADIX); + if (v > max) { + max = v; + } + } + } + } + return max; + } + + /** + * Get the generation (N) of the current segments_N file + * in the directory. + * + * @param directory -- directory to search for the latest segments_N file + */ + public static long getCurrentSegmentGeneration(Directory directory) throws IOException { + String[] files = directory.list(); + if (files == null) + throw new IOException("Cannot read directory " + directory); + return getCurrentSegmentGeneration(files); + } + + /** + * Get the filename of the current segments_N file + * from a list of files. + * + * @param files -- array of file names to check + */ + + public static String getCurrentSegmentFileName(String[] files) throws IOException { + return IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, + "", + getCurrentSegmentGeneration(files)); + } + + /** + * Get the filename of the current segments_N file + * in the directory. + * + * @param directory -- directory to search for the latest segments_N file + */ + public static String getCurrentSegmentFileName(Directory directory) throws IOException { + return IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, + "", + getCurrentSegmentGeneration(directory)); + } + + /** + * Get the segment_N filename in use by this segment infos. + */ + public String getCurrentSegmentFileName() { + return IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, + "", + generation); + } + + /** + * Read a particular segmentFileName. Note that this may + * throw an IOException if a commit is in process. + * + * @param directory -- directory containing the segments file + * @param segmentFileName -- segment file to load + */ + public final void read(Directory directory, String segmentFileName) throws IOException { + boolean success = false; + + IndexInput input = directory.openInput(segmentFileName); + + if (segmentFileName.equals(IndexFileNames.SEGMENTS)) { + generation = 0; + } else { + generation = Long.parseLong(segmentFileName.substring(1+IndexFileNames.SEGMENTS.length()), + Character.MAX_RADIX); + } + try { int format = input.readInt(); if(format < 0){ // file contains explicit format info // check that it is a format we can understand - if (format < FORMAT) + if (format < FORMAT_LOCKLESS) throw new IOException("Unknown format version: " + format); version = input.readLong(); // read version counter = input.readInt(); // read counter @@ -57,9 +172,7 @@ } for (int i = input.readInt(); i > 0; i--) { // read segmentInfos - SegmentInfo si = - new SegmentInfo(input.readString(), input.readInt(), directory); - addElement(si); + addElement(new SegmentInfo(directory, format, input)); } if(format >= 0){ // in old format the version number may be at the end of the file @@ -68,31 +181,71 @@ else version = input.readLong(); // read version } + success = true; } finally { input.close(); + if (!success) { + // Clear any segment infos we had loaded so we + // have a clean slate on retry: + clear(); + } } } + /** + * This version of read uses the retry logic (for lock-less + * commits) to find the right segments file to load. + */ + public final void read(Directory directory) throws IOException { + generation = -1; + + new FindSegmentsFile(directory) { + + public Object doBody(String segmentFileName) throws IOException { + read(directory, segmentFileName); + return null; + } + }.run(); + } + public final void write(Directory directory) throws IOException { - IndexOutput output = directory.createOutput("segments.new"); + + // Always advance the generation on write: + if (generation == -1) { + generation = 1; + } else { + generation++; + } + + String segmentFileName = getCurrentSegmentFileName(); + IndexOutput output = directory.createOutput(segmentFileName); + try { - output.writeInt(FORMAT); // write FORMAT - output.writeLong(++version); // every write changes the index + output.writeInt(FORMAT_LOCKLESS); // write FORMAT + output.writeLong(++version); // every write changes + // the index output.writeInt(counter); // write counter output.writeInt(size()); // write infos for (int i = 0; i < size(); i++) { SegmentInfo si = info(i); - output.writeString(si.name); - output.writeInt(si.docCount); + si.write(output); } } finally { output.close(); } - // install new segment info - directory.renameFile("segments.new", IndexFileNames.SEGMENTS); + try { + output = directory.createOutput(IndexFileNames.SEGMENTS_GEN); + output.writeInt(FORMAT_LOCKLESS); + output.writeLong(generation); + output.writeLong(generation); + output.close(); + } catch (IOException e) { + // It's OK if we fail to write this file since it's + // used only as one of the retry fallbacks. + } } /** @@ -107,30 +260,322 @@ */ public static long readCurrentVersion(Directory directory) throws IOException { + + return ((Long) new FindSegmentsFile(directory) { + public Object doBody(String segmentFileName) throws IOException { + + IndexInput input = directory.openInput(segmentFileName); + + int format = 0; + long version = 0; + try { + format = input.readInt(); + if(format < 0){ + if (format < FORMAT_LOCKLESS) + throw new IOException("Unknown format version: " + format); + version = input.readLong(); // read version + } + } + finally { + input.close(); + } + + if(format < 0) + return new Long(version); + + // We cannot be sure about the format of the file. + // Therefore we have to read the whole file and cannot simply seek to the version entry. + SegmentInfos sis = new SegmentInfos(); + sis.read(directory, segmentFileName); + return new Long(sis.getVersion()); + } + }.run()).longValue(); + } + + /** If non-null, information about retries when loading + * the segments file will be printed to this. + */ + public static void setInfoStream(PrintStream infoStream) { + SegmentInfos.infoStream = infoStream; + } + + /* Advanced configuration of retry logic in loading + segments_N file */ + private static int defaultGenFileRetryCount = 10; + private static int defaultGenFileRetryPauseMsec = 50; + private static int defaultGenLookaheadCount = 10; + + /** + * Advanced: set how many times to try loading the + * segments.gen file contents to determine current segment + * generation. This file is only referenced when the + * primary method (listing the directory) fails. + */ + public static void setDefaultGenFileRetryCount(int count) { + defaultGenFileRetryCount = count; + } + + /** + * @see #setDefaultGenFileRetryCount + */ + public static int getDefaultGenFileRetryCount() { + return defaultGenFileRetryCount; + } + + /** + * Advanced: set how many milliseconds to pause in between + * attempts to load the segments.gen file. + */ + public static void setDefaultGenFileRetryPauseMsec(int msec) { + defaultGenFileRetryPauseMsec = msec; + } + + /** + * @see #setDefaultGenFileRetryPauseMsec + */ + public static int getDefaultGenFileRetryPauseMsec() { + return defaultGenFileRetryPauseMsec; + } + + /** + * Advanced: set how many times to try incrementing the + * gen when loading the segments file. This only runs if + * the primary (listing directory) and secondary (opening + * segments.gen file) methods fail to find the segments + * file. + */ + public static void setDefaultGenLookaheadCount(int count) { + defaultGenLookaheadCount = count; + } + /** + * @see #setDefaultGenLookaheadCount + */ + public static int getDefaultGenLookahedCount() { + return defaultGenLookaheadCount; + } + + /** + * @see #setInfoStream + */ + public static PrintStream getInfoStream() { + return infoStream; + } + + private static void message(String message) { + if (infoStream != null) { + infoStream.println(Thread.currentThread().getName() + ": " + message); + } + } + + /** + * Utility class for executing code that needs to do + * something with the current segments file. This is + * necessary with lock-less commits because from the time + * you locate the current segments file name, until you + * actually open it, read its contents, or check modified + * time, etc., it could have been deleted due to a writer + * commit finishing. + */ + public abstract static class FindSegmentsFile { + + File fileDirectory; + Directory directory; + + public FindSegmentsFile(File directory) { + this.fileDirectory = directory; + } + + public FindSegmentsFile(Directory directory) { + this.directory = directory; + } + + public Object run() throws IOException { + String segmentFileName = null; + long lastGen = -1; + long gen = 0; + int genLookaheadCount = 0; + IOException exc = null; + boolean retry = false; + + int method = 0; + + // Loop until we succeed in calling doBody() without + // hitting an IOException. An IOException most likely + // means a commit was in process and has finished, in + // the time it took us to load the now-old infos files + // (and segments files). It's also possible it's a + // true error (corrupt index). To distinguish these, + // on each retry we must see "forward progress" on + // which generation we are trying to load. If we + // don't, then the original error is real and we throw + // it. - IndexInput input = directory.openInput(IndexFileNames.SEGMENTS); - int format = 0; - long version = 0; - try { - format = input.readInt(); - if(format < 0){ - if (format < FORMAT) - throw new IOException("Unknown format version: " + format); - version = input.readLong(); // read version + // We have three methods for determining the current + // generation. We try each in sequence. + + while(true) { + + // Method 1: list the directory and use the highest + // segments_N file. This method works well as long + // as there is no stale caching on the directory + // contents: + String[] files = null; + + if (0 == method) { + if (directory != null) { + files = directory.list(); + } else { + files = fileDirectory.list(); + } + + gen = getCurrentSegmentGeneration(files); + + if (gen == -1) { + String s = ""; + for(int i=0;i gen) { + message("fallback to '" + IndexFileNames.SEGMENTS_GEN + "' check: now try generation " + gen0 + " > " + gen); + gen = gen0; + } + break; + } + } + } catch (IOException err2) { + // will retry + } finally { + genInput.close(); + } + } + try { + Thread.sleep(defaultGenFileRetryPauseMsec); + } catch (InterruptedException e) { + // will retry + } + } + } + + // Method 3 (fallback if Methods 2 & 3 are not + // reliabel): since both directory cache and file + // contents cache seem to be stale, just advance the + // generation. + if (2 == method || (1 == method && lastGen == gen && retry)) { + + method = 2; + + if (genLookaheadCount < defaultGenLookaheadCount) { + gen++; + genLookaheadCount++; + message("look ahead incremenent gen to " + gen); + } + } + + if (lastGen == gen) { + + // This means we're about to try the same + // segments_N last tried. This is allowed, + // exactly once, because writer could have been in + // the process of writing segments_N last time. + + if (retry) { + // OK, we've tried the same segments_N file + // twice in a row, so this must be a real + // error. We throw the original exception we + // got. + throw exc; + } else { + retry = true; + } + + } else { + // Segment file has advanced since our last loop, so + // reset retry: + retry = false; + } + + lastGen = gen; + + segmentFileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, + "", + gen); + + try { + Object v = doBody(segmentFileName); + if (exc != null) { + message("success on " + segmentFileName); + } + return v; + } catch (IOException err) { + + // Save the original root cause: + if (exc == null) { + exc = err; + } + + message("primary Exception on '" + segmentFileName + "': " + err + "'; will retry: retry=" + retry + "; gen = " + gen); + + if (!retry && gen > 1) { + + // This is our first time trying this segments + // file (because retry is false), and, there is + // possibly a segments_(N-1) (because gen > 1). + // So, check if the segments_(N-1) exists and + // try it if so: + String prevSegmentFileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, + "", + gen-1); + + if (directory.fileExists(prevSegmentFileName)) { + message("fallback to prior segment file '" + prevSegmentFileName + "'"); + try { + Object v = doBody(prevSegmentFileName); + if (exc != null) { + message("success on fallback " + prevSegmentFileName); + } + return v; + } catch (IOException err2) { + message("secondary Exception on '" + prevSegmentFileName + "': " + err2 + "'; will retry"); + } + } + } + } } } - finally { - input.close(); - } - - if(format < 0) - return version; - // We cannot be sure about the format of the file. - // Therefore we have to read the whole file and cannot simply seek to the version entry. - - SegmentInfos sis = new SegmentInfos(); - sis.read(directory); - return sis.getVersion(); - } + /** + * Subclass must implement this. The assumption is an + * IOException will be thrown if something goes wrong + * during the processing that could have been caused by + * a writer committing. + */ + protected abstract Object doBody(String segmentFileName) throws IOException;} } Index: src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- src/java/org/apache/lucene/index/IndexWriter.java (revision 470932) +++ src/java/org/apache/lucene/index/IndexWriter.java (working copy) @@ -66,16 +66,7 @@ private long writeLockTimeout = WRITE_LOCK_TIMEOUT; - /** - * Default value for the commit lock timeout (10,000). - * @see #setDefaultCommitLockTimeout - */ - public static long COMMIT_LOCK_TIMEOUT = 10000; - - private long commitLockTimeout = COMMIT_LOCK_TIMEOUT; - public static final String WRITE_LOCK_NAME = "write.lock"; - public static final String COMMIT_LOCK_NAME = "commit.lock"; /** * Default value is 10. Change using {@link #setMergeFactor(int)}. @@ -110,6 +101,7 @@ private SegmentInfos segmentInfos = new SegmentInfos(); // the segments private SegmentInfos ramSegmentInfos = new SegmentInfos(); // the segments in ramDirectory private final Directory ramDirectory = new RAMDirectory(); // for temp segs + private IndexFileDeleter deleter; private Lock writeLock; @@ -258,17 +250,28 @@ throw new IOException("Index locked for write: " + writeLock); this.writeLock = writeLock; // save it - synchronized (directory) { // in- & inter-process sync - new Lock.With(directory.makeLock(IndexWriter.COMMIT_LOCK_NAME), commitLockTimeout) { - public Object doBody() throws IOException { - if (create) - segmentInfos.write(directory); - else - segmentInfos.read(directory); - return null; - } - }.run(); + if (create) { + // Try to read first. This is to allow create + // against an index that's currently open for + // searching. In this case we write the next + // segments_N file with no segments: + try { + segmentInfos.read(directory); + segmentInfos.clear(); + } catch (IOException e) { + // Likely this means it's a fresh directory + } + segmentInfos.write(directory); + } else { + segmentInfos.read(directory); } + + // Create a deleter to keep track of which files can + // be deleted: + deleter = new IndexFileDeleter(segmentInfos, directory); + deleter.setInfoStream(infoStream); + deleter.findDeletableFiles(); + deleter.deleteFiles(); } /** Determines the largest number of documents ever merged by addDocument(). @@ -373,35 +376,6 @@ } /** - * Sets the maximum time to wait for a commit lock (in milliseconds) for this instance of IndexWriter. @see - * @see #setDefaultCommitLockTimeout to change the default value for all instances of IndexWriter. - */ - public void setCommitLockTimeout(long commitLockTimeout) { - this.commitLockTimeout = commitLockTimeout; - } - - /** - * @see #setCommitLockTimeout - */ - public long getCommitLockTimeout() { - return commitLockTimeout; - } - - /** - * Sets the default (for any instance of IndexWriter) maximum time to wait for a commit lock (in milliseconds) - */ - public static void setDefaultCommitLockTimeout(long commitLockTimeout) { - IndexWriter.COMMIT_LOCK_TIMEOUT = commitLockTimeout; - } - - /** - * @see #setDefaultCommitLockTimeout - */ - public static long getDefaultCommitLockTimeout() { - return IndexWriter.COMMIT_LOCK_TIMEOUT; - } - - /** * Sets the maximum time to wait for a write lock (in milliseconds) for this instance of IndexWriter. @see * @see #setDefaultWriteLockTimeout to change the default value for all instances of IndexWriter. */ @@ -509,7 +483,7 @@ String segmentName = newRAMSegmentName(); dw.addDocument(segmentName, doc); synchronized (this) { - ramSegmentInfos.addElement(new SegmentInfo(segmentName, 1, ramDirectory)); + ramSegmentInfos.addElement(new SegmentInfo(segmentName, 1, ramDirectory, false)); maybeFlushRamSegments(); } } @@ -782,36 +756,26 @@ int docCount = merger.merge(); // merge 'em segmentInfos.setSize(0); // pop old infos & add new - segmentInfos.addElement(new SegmentInfo(mergedName, docCount, directory)); + SegmentInfo info = new SegmentInfo(mergedName, docCount, directory, false); + segmentInfos.addElement(info); if(sReader != null) sReader.close(); - synchronized (directory) { // in- & inter-process sync - new Lock.With(directory.makeLock(COMMIT_LOCK_NAME), commitLockTimeout) { - public Object doBody() throws IOException { - segmentInfos.write(directory); // commit changes - return null; - } - }.run(); - } + String segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName(); + segmentInfos.write(directory); // commit changes - deleteSegments(segmentsToDelete); // delete now-unused segments + deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file + deleter.deleteSegments(segmentsToDelete); // delete now-unused segments if (useCompoundFile) { - final Vector filesToDelete = merger.createCompoundFile(mergedName + ".tmp"); - synchronized (directory) { // in- & inter-process sync - new Lock.With(directory.makeLock(COMMIT_LOCK_NAME), commitLockTimeout) { - public Object doBody() throws IOException { - // make compound file visible for SegmentReaders - directory.renameFile(mergedName + ".tmp", mergedName + ".cfs"); - return null; - } - }.run(); - } + Vector filesToDelete = merger.createCompoundFile(mergedName + ".cfs"); + segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName(); + info.setUseCompoundFile(true); + segmentInfos.write(directory); // commit again so readers know we've switched this segment to a compound file - // delete now unused files of segment - deleteFiles(filesToDelete); + deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file + deleter.deleteFiles(filesToDelete); // delete now unused files of segment } } @@ -929,10 +893,11 @@ */ private final int mergeSegments(SegmentInfos sourceSegments, int minSegment, int end) throws IOException { + final String mergedName = newSegmentName(); if (infoStream != null) infoStream.print("merging segments"); SegmentMerger merger = new SegmentMerger(this, mergedName); - + final Vector segmentsToDelete = new Vector(); for (int i = minSegment; i < end; i++) { SegmentInfo si = sourceSegments.info(i); @@ -952,7 +917,7 @@ } SegmentInfo newSegment = new SegmentInfo(mergedName, mergedDocCount, - directory); + directory, false); if (sourceSegments == ramSegmentInfos) { sourceSegments.removeAllElements(); segmentInfos.addElement(newSegment); @@ -965,115 +930,26 @@ // close readers before we attempt to delete now-obsolete segments merger.closeReaders(); - synchronized (directory) { // in- & inter-process sync - new Lock.With(directory.makeLock(COMMIT_LOCK_NAME), commitLockTimeout) { - public Object doBody() throws IOException { - segmentInfos.write(directory); // commit before deleting - return null; - } - }.run(); - } - - deleteSegments(segmentsToDelete); // delete now-unused segments + String segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName(); + segmentInfos.write(directory); // commit before deleting + deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file + deleter.deleteSegments(segmentsToDelete); // delete now-unused segments + if (useCompoundFile) { - final Vector filesToDelete = merger.createCompoundFile(mergedName + ".tmp"); - synchronized (directory) { // in- & inter-process sync - new Lock.With(directory.makeLock(COMMIT_LOCK_NAME), commitLockTimeout) { - public Object doBody() throws IOException { - // make compound file visible for SegmentReaders - directory.renameFile(mergedName + ".tmp", mergedName + ".cfs"); - return null; - } - }.run(); - } + Vector filesToDelete = merger.createCompoundFile(mergedName + ".cfs"); - // delete now unused files of segment - deleteFiles(filesToDelete); + segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName(); + newSegment.setUseCompoundFile(true); + segmentInfos.write(directory); // commit again so readers know we've switched this segment to a compound file + + deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file + deleter.deleteFiles(filesToDelete); // delete now-unused segments } return mergedDocCount; } - /* - * Some operating systems (e.g. Windows) don't permit a file to be deleted - * while it is opened for read (e.g. by another process or thread). So we - * assume that when a delete fails it is because the file is open in another - * process, and queue the file for subsequent deletion. - */ - - private final void deleteSegments(Vector segments) throws IOException { - Vector deletable = new Vector(); - - deleteFiles(readDeleteableFiles(), deletable); // try to delete deleteable - - for (int i = 0; i < segments.size(); i++) { - SegmentReader reader = (SegmentReader)segments.elementAt(i); - if (reader.directory() == this.directory) - deleteFiles(reader.files(), deletable); // try to delete our files - else - deleteFiles(reader.files(), reader.directory()); // delete other files - } - - writeDeleteableFiles(deletable); // note files we can't delete - } - - private final void deleteFiles(Vector files) throws IOException { - Vector deletable = new Vector(); - deleteFiles(readDeleteableFiles(), deletable); // try to delete deleteable - deleteFiles(files, deletable); // try to delete our files - writeDeleteableFiles(deletable); // note files we can't delete - } - - private final void deleteFiles(Vector files, Directory directory) - throws IOException { - for (int i = 0; i < files.size(); i++) - directory.deleteFile((String)files.elementAt(i)); - } - - private final void deleteFiles(Vector files, Vector deletable) - throws IOException { - for (int i = 0; i < files.size(); i++) { - String file = (String)files.elementAt(i); - try { - directory.deleteFile(file); // try to delete each file - } catch (IOException e) { // if delete fails - if (directory.fileExists(file)) { - if (infoStream != null) - infoStream.println(e.toString() + "; Will re-try later."); - deletable.addElement(file); // add to deletable - } - } - } - } - - private final Vector readDeleteableFiles() throws IOException { - Vector result = new Vector(); - if (!directory.fileExists(IndexFileNames.DELETABLE)) - return result; - - IndexInput input = directory.openInput(IndexFileNames.DELETABLE); - try { - for (int i = input.readInt(); i > 0; i--) // read file names - result.addElement(input.readString()); - } finally { - input.close(); - } - return result; - } - - private final void writeDeleteableFiles(Vector files) throws IOException { - IndexOutput output = directory.createOutput("deleteable.new"); - try { - output.writeInt(files.size()); - for (int i = 0; i < files.size(); i++) - output.writeString((String)files.elementAt(i)); - } finally { - output.close(); - } - directory.renameFile("deleteable.new", IndexFileNames.DELETABLE); - } - private final boolean checkNonDecreasingLevels(int start) { int lowerBound = -1; int upperBound = minMergeDocs; Index: src/java/org/apache/lucene/index/SegmentInfo.java =================================================================== --- src/java/org/apache/lucene/index/SegmentInfo.java (revision 470932) +++ src/java/org/apache/lucene/index/SegmentInfo.java (working copy) @@ -17,15 +17,302 @@ */ import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.IndexInput; +import java.io.IOException; final class SegmentInfo { public String name; // unique name in dir public int docCount; // number of docs in seg public Directory dir; // where segment resides + private boolean preLockless; // true if this is a segments file written before + // lock-less commits (XXX) + + private long delGen; // current generation of del file; -1 if there + // are no deletes; 0 if it's a pre-XXX segment + // (and we must check filesystem); 1 or higher if + // there are deletes at generation N + + private long[] normGen; // current generations of each field's norm file. + // If this array is null, we must check filesystem + // when preLockLess is true. Else, + // there are no separate norms + + private byte isCompoundFile; // -1 if it is not; 1 if it is; 0 if it's + // pre-XXX (ie, must check file system to see + // if .cfs exists) + public SegmentInfo(String name, int docCount, Directory dir) { this.name = name; this.docCount = docCount; this.dir = dir; + delGen = -1; + isCompoundFile = 0; + preLockless = true; } + public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile) { + this(name, docCount, dir); + if (isCompoundFile) { + this.isCompoundFile = 1; + } else { + this.isCompoundFile = -1; + } + preLockless = false; + } + + + /** + * Construct a new SegmentInfo instance by reading a + * previously saved SegmentInfo from input. + * + * @param dir directory to load from + * @param format format of the segments info file + * @param input input handle to read segment info from + */ + public SegmentInfo(Directory dir, int format, IndexInput input) throws IOException { + this.dir = dir; + name = input.readString(); + docCount = input.readInt(); + if (format <= SegmentInfos.FORMAT_LOCKLESS) { + delGen = input.readLong(); + int numNormGen = input.readInt(); + if (numNormGen == -1) { + normGen = null; + } else { + normGen = new long[numNormGen]; + for(int j=0;j 0: this means this segment was written by + // the LOCKLESS code and for certain has + // deletions + // + if (delGen == -1) { + return false; + } else if (delGen > 0) { + return true; + } else { + return dir.fileExists(getDelFileName()); + } + } + + void advanceDelGen() { + // delGen 0 is reserved for pre-LOCKLESS format + if (delGen == -1) { + delGen = 1; + } else { + delGen++; + } + } + + void clearDelGen() { + delGen = -1; + } + + String getDelFileName() { + if (delGen == -1) { + // In this case we know there is no deletion filename + // against this segment + return null; + } else { + // If delGen is 0, it's the pre-lockless-commit file format + return IndexFileNames.fileNameFromGeneration(name, ".del", delGen); + } + } + + /** + * Returns true if this field for this segment has saved a separate norms file (__N.sX). + * + * @param fieldNumber the field index to check + */ + boolean hasSeparateNorms(int fieldNumber) + throws IOException { + if ((normGen == null && preLockless) || (normGen != null && normGen[fieldNumber] == 0)) { + // Must fallback to directory file exists check: + String fileName = name + ".s" + fieldNumber; + return dir.fileExists(fileName); + } else if (normGen == null || normGen[fieldNumber] == -1) { + return false; + } else { + return true; + } + } + + /** + * Returns true if any fields in this segment have separate norms. + */ + boolean hasSeparateNorms() + throws IOException { + if (normGen == null) { + if (!preLockless) { + // This means we were created w/ LOCKLESS code and no + // norms are written yet: + return false; + } else { + // This means this segment was saved with pre-LOCKLESS + // code. So we must fallback to the original + // directory list check: + String[] result = dir.list(); + String pattern; + pattern = name + ".s"; + int patternLength = pattern.length(); + for(int i = 0; i < result.length; i++){ + if(result[i].startsWith(pattern) && Character.isDigit(result[i].charAt(patternLength))) + return true; + } + return false; + } + } else { + // This means this segment was saved with LOCKLESS + // code so we first check whether any normGen's are > + // 0 (meaning they definitely have separate norms): + for(int i=0;i 0) { + return true; + } + } + // Next we look for any == 0. These cases were + // pre-LOCKLESS and must be checked in directory: + for(int i=0;i

This document defines the index file formats used - in Lucene version 2.0. If you are using a different + in Lucene version XXX. If you are using a different version of Lucene, please consult the copy of docs/fileformats.html that was distributed with the version you are using. @@ -141,6 +141,15 @@ Compatibility notes are provided in this document, describing how file formats have changed from prior versions.

+

+ In version XXX, the file format was changed to allow + lock-less commits. The change is fully backwards + compatible: you can open a pre-XXX index for searching + or adding/deleting of docs. When the new segments + file is saved (committed), it will be written in the + new file format (meaning no specific "upgrade" process + is needed). +

@@ -402,6 +411,15 @@ in an index are stored in a single directory, although this is not required.

+

+ As of version XXX (lock-less commits), file names are + never re-used. That is, when any file is saved to the + Directory it is given a never before used filename. + This is achieved using a simple generations approach. + For example, the first segments file is segments_1, + then segments_2, etc. The generation is a sequential + long integer represented in alpha-numeric (base 36) form. +

@@ -1078,27 +1096,55 @@

The active segments in the index are stored in the - segment info file. An index only has - a single file in this format, and it is named "segments". - This lists each segment by name, and also contains the size of each - segment. + segment info file, segments_N. There may + be one or more segments_N files in the + index; however, the one with the largest + generation is the active one (when older + segments_N files are present it's because they + temporarily cannot be deleted, or, a writer is in + the process of committing). This file lists each + segment by name, has details about the separate + norms and deletion files, and also contains the + size of each segment.

+ As of XXX, there is also a file + segments.gen. This file contains the + current generation (the _N in + segments_N) of the index. This is + recorded only as a fallback in case the current + generation cannot be accurately determined by + directory listing alone (as is the case for some + NFS clients with time-based directory cache + expiraation). This file simply contains an Int32 + version header (SegmentInfos.FORMAT_LOCKLESS = + -2), followed by the generation recorded as Int64, + written twice. +

+

+ Pre-XXX: Segments --> Format, Version, NameCounter, SegCount, <SegName, SegSize>SegCount

- Format, NameCounter, SegCount, SegSize --> UInt32 + XXX and above: + Segments --> Format, Version, NameCounter, SegCount, <SegName, SegSize, DelGen, NumField, NormGenNumField >SegCount, IsCompoundFile

- Version --> UInt64 + Format, NameCounter, SegCount, SegSize, NumField --> UInt32

+ Version, DelGen, NormGen --> UInt64 +

+

SegName --> String

- Format is -1 in Lucene 1.4. + IsCompoundFile --> Int8

+ Format is -1 as of Lucene 1.4 and -2 as of XXX. +

+

Version counts how often the index has been changed by adding or deleting documents.

@@ -1134,15 +1180,20 @@
  • - When a file named "commit.lock" - is present, a process is currently re-writing the "segments" - file and deleting outdated segment index files, or a process is - reading the "segments" - file and opening the files of the segments it names. This lock file - prevents files from being deleted by another process after a process - has read the "segments" - file but before it has managed to open all of the files of the - segments named therein. + When a file named "commit.lock" is + present, a process is currently re-writing + the "segments" file and deleting outdated + segment index files, or a process is + reading the "segments" file and opening + the files of the segments it names. This + lock file prevents files from being + deleted by another process after a process + has read the "segments" file but before it + has managed to open all of the files of + the segments named therein. As of XXX, + the "commit.lock" is no longer used + because readers are able to open an index + even while a writer is committing.

  • @@ -1168,11 +1219,15 @@

    - A file named "deletable" - contains the names of files that are no longer used by the index, but - which could not be deleted. This is only used on Win32, where a - file may not be deleted while it is still open. On other platforms - the file contains only null bytes. + A file named "deletable" contains the names of + files that are no longer used by the index, but + which could not be deleted. This is only used on + Win32, where a file may not be deleted while it is + still open. On other platforms the file contains + only null bytes. As of version XXX, there is no + deleteable file. Instead, the deletable files are + computed on creating a writer and then maintained + as an in-memory vector.

    Deletable --> DeletableCount, Index: xdocs/fileformats.xml =================================================================== --- xdocs/fileformats.xml (revision 470932) +++ xdocs/fileformats.xml (working copy) @@ -14,7 +14,7 @@

    This document defines the index file formats used - in Lucene version 2.0. If you are using a different + in Lucene version XXX. If you are using a different version of Lucene, please consult the copy of docs/fileformats.html that was distributed with the version you are using. @@ -43,6 +43,16 @@ describing how file formats have changed from prior versions.

    +

    + In version XXX, the file format was changed to allow + lock-less commits. The change is fully backwards + compatible: you can open a pre-XXX index for searching + or adding/deleting of docs. When the new segments + file is saved (committed), it will be written in the + new file format (meaning no specific "upgrade" process + is needed). +

    +
    @@ -260,6 +270,16 @@ required.

    +

    + As of version XXX (lock-less commits), file names are + never re-used. That is, when any file is saved to the + Directory it is given a never before used filename. + This is achieved using a simple generations approach. + For example, the first segments file is segments_1, + then segments_2, etc. The generation is a sequential + long integer represented in alpha-numeric (base 36) form. +

    +
    @@ -696,22 +716,48 @@

    The active segments in the index are stored in the - segment info file. An index only has - a single file in this format, and it is named "segments". - This lists each segment by name, and also contains the size of each - segment. + segment info file, segments_N. There may + be one or more segments_N files in the + index; however, the one with the largest + generation is the active one (when older + segments_N files are present it's because they + temporarily cannot be deleted, or, a writer is in + the process of committing). This file lists each + segment by name, has details about the separate + norms and deletion files, and also contains the + size of each segment.

    +

    + As of XXX, there is also a file + segments.gen. This file contains the + current generation (the _N in + segments_N) of the index. This is + recorded only as a fallback in case the current + generation cannot be accurately determined by + directory listing alone (as is the case for some + NFS clients with time-based directory cache + expiraation). This file simply contains an Int32 + version header (SegmentInfos.FORMAT_LOCKLESS = + -2), followed by the generation recorded as Int64, + written twice. +

    +

    + Pre-XXX: Segments --> Format, Version, NameCounter, SegCount, <SegName, SegSize>SegCount

    +

    + XXX and above: + Segments --> Format, Version, NameCounter, SegCount, <SegName, SegSize, DelGen, NumField, NormGenNumField >SegCount, IsCompoundFile +

    - Format, NameCounter, SegCount, SegSize --> UInt32 + Format, NameCounter, SegCount, SegSize, NumField --> UInt32

    - Version --> UInt64 + Version, DelGen, NormGen --> UInt64

    @@ -719,10 +765,14 @@

    - Format is -1 in Lucene 1.4. + IsCompoundFile --> Int8

    + Format is -1 as of Lucene 1.4 and -2 as of XXX. +

    + +

    Version counts how often the index has been changed by adding or deleting documents.

    @@ -756,15 +806,20 @@
    • - When a file named "commit.lock" - is present, a process is currently re-writing the "segments" - file and deleting outdated segment index files, or a process is - reading the "segments" - file and opening the files of the segments it names. This lock file - prevents files from being deleted by another process after a process - has read the "segments" - file but before it has managed to open all of the files of the - segments named therein. + When a file named "commit.lock" is + present, a process is currently re-writing + the "segments" file and deleting outdated + segment index files, or a process is + reading the "segments" file and opening + the files of the segments it names. This + lock file prevents files from being + deleted by another process after a process + has read the "segments" file but before it + has managed to open all of the files of + the segments named therein. As of XXX, + the "commit.lock" is no longer used + because readers are able to open an index + even while a writer is committing.

    • @@ -782,11 +837,15 @@

      - A file named "deletable" - contains the names of files that are no longer used by the index, but - which could not be deleted. This is only used on Win32, where a - file may not be deleted while it is still open. On other platforms - the file contains only null bytes. + A file named "deletable" contains the names of + files that are no longer used by the index, but + which could not be deleted. This is only used on + Win32, where a file may not be deleted while it is + still open. On other platforms the file contains + only null bytes. As of version XXX, there is no + deleteable file. Instead, the deletable files are + computed on creating a writer and then maintained + as an in-memory vector.