Index: src/test/org/apache/lucene/store/TestLockFactory.java =================================================================== --- src/test/org/apache/lucene/store/TestLockFactory.java (revision 468583) +++ src/test/org/apache/lucene/store/TestLockFactory.java (working copy) @@ -57,9 +57,9 @@ // Both write lock and commit lock should have been created: assertEquals("# of unique locks created (after instantiating IndexWriter)", - 2, lf.locksCreated.size()); - assertTrue("# calls to makeLock <= 2 (after instantiating IndexWriter)", - lf.makeLockCount > 2); + 1, lf.locksCreated.size()); + assertTrue("# calls to makeLock is 0 (after instantiating IndexWriter)", + lf.makeLockCount >= 1); for(Enumeration e = lf.locksCreated.keys(); e.hasMoreElements();) { String lockName = (String) e.nextElement(); @@ -89,6 +89,7 @@ try { writer2 = new IndexWriter(dir, new WhitespaceAnalyzer(), false); } catch (Exception e) { + e.printStackTrace(System.out); fail("Should not have hit an IOException with no locking"); } @@ -233,6 +234,7 @@ try { writer2 = new IndexWriter(indexDirName, new WhitespaceAnalyzer(), false); } catch (IOException e) { + e.printStackTrace(System.out); fail("Should not have hit an IOException with locking disabled"); } @@ -265,6 +267,7 @@ try { fs2 = FSDirectory.getDirectory(indexDirName, true, lf); } catch (IOException e) { + e.printStackTrace(System.out); fail("Should not have hit an IOException because LockFactory instances are the same"); } @@ -293,7 +296,6 @@ public void _testStressLocks(LockFactory lockFactory, String indexDirName) throws IOException { FSDirectory fs1 = FSDirectory.getDirectory(indexDirName, true, lockFactory); - // fs1.setLockFactory(NoLockFactory.getNoLockFactory()); // First create a 1 doc index: IndexWriter w = new IndexWriter(fs1, new WhitespaceAnalyzer(), true); @@ -404,6 +406,7 @@ hitException = true; System.out.println("Stress Test Index Writer: creation hit unexpected exception: " + e.toString()); e.printStackTrace(System.out); + break; } if (writer != null) { try { @@ -412,6 +415,7 @@ hitException = true; System.out.println("Stress Test Index Writer: addDoc hit unexpected exception: " + e.toString()); e.printStackTrace(System.out); + break; } try { writer.close(); @@ -419,6 +423,7 @@ hitException = true; System.out.println("Stress Test Index Writer: close hit unexpected exception: " + e.toString()); e.printStackTrace(System.out); + break; } writer = null; } @@ -445,6 +450,7 @@ hitException = true; System.out.println("Stress Test Index Searcher: create hit unexpected exception: " + e.toString()); e.printStackTrace(System.out); + break; } if (searcher != null) { Hits hits = null; @@ -454,6 +460,7 @@ hitException = true; System.out.println("Stress Test Index Searcher: search hit unexpected exception: " + e.toString()); e.printStackTrace(System.out); + break; } // System.out.println(hits.length() + " total results"); try { @@ -462,6 +469,7 @@ hitException = true; System.out.println("Stress Test Index Searcher: close hit unexpected exception: " + e.toString()); e.printStackTrace(System.out); + break; } searcher = null; } Index: src/test/org/apache/lucene/index/TestIndexWriter.java =================================================================== --- src/test/org/apache/lucene/index/TestIndexWriter.java (revision 468583) +++ src/test/org/apache/lucene/index/TestIndexWriter.java (working copy) @@ -1,6 +1,7 @@ package org.apache.lucene.index; import java.io.IOException; +import java.io.File; import junit.framework.TestCase; @@ -10,7 +11,10 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; /** @@ -28,14 +32,11 @@ int i; IndexWriter.setDefaultWriteLockTimeout(2000); - IndexWriter.setDefaultCommitLockTimeout(2000); assertEquals(2000, IndexWriter.getDefaultWriteLockTimeout()); - assertEquals(2000, IndexWriter.getDefaultCommitLockTimeout()); writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); IndexWriter.setDefaultWriteLockTimeout(1000); - IndexWriter.setDefaultCommitLockTimeout(1000); // add 100 documents for (i = 0; i < 100; i++) { @@ -72,6 +73,12 @@ assertEquals(60, reader.maxDoc()); assertEquals(60, reader.numDocs()); reader.close(); + + // make sure opening a new index for create over + // this existing one works correctly: + writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); + assertEquals(0, writer.docCount()); + writer.close(); } private void addDoc(IndexWriter writer) throws IOException @@ -80,4 +87,192 @@ doc.add(new Field("content", "aaa", Field.Store.NO, Field.Index.TOKENIZED)); writer.addDocument(doc); } + + // Make sure we can open an index for create even when a + // reader holds it open (this fails pre lock-less + // commits on windows): + public void testCreateWithReader() throws IOException { + String tempDir = System.getProperty("java.io.tmpdir"); + if (tempDir == null) + throw new IOException("java.io.tmpdir undefined, cannot run test"); + File indexDir = new File(tempDir, "lucenetestindexwriter"); + Directory dir = FSDirectory.getDirectory(indexDir, true); + + // add one document & close writer + IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); + addDoc(writer); + writer.close(); + + // now open reader: + IndexReader reader = IndexReader.open(dir); + assertEquals("should be one document", reader.numDocs(), 1); + + // now open index for create: + writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); + assertEquals("should be zero documents", writer.docCount(), 0); + addDoc(writer); + writer.close(); + + assertEquals("should be one document", reader.numDocs(), 1); + IndexReader reader2 = IndexReader.open(dir); + assertEquals("should be one document", reader2.numDocs(), 1); + reader.close(); + reader2.close(); + rmDir(indexDir); + } + + // Simulate a writer that crashed while writing segments + // file: make sure we can still open the index (ie, + // gracefully fallback to the previous segments file), + // and that we can add to the index: + public void testSimulatedCrashedWriter() throws IOException { + Directory dir = new RAMDirectory(); + + IndexWriter writer = null; + + writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); + + // add 100 documents + for (int i = 0; i < 100; i++) { + addDoc(writer); + } + + // close + writer.close(); + + long gen = SegmentInfos.getCurrentSegmentGeneration(dir); + assertTrue("segment generation should be > 1 but got " + gen, gen > 1); + + // Make the next segments file, with last byte + // missing, to simulate a writer that crashed while + // writing segments file: + String fileNameIn = SegmentInfos.getCurrentSegmentFileName(dir); + String fileNameOut = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, + "", + 1+gen); + IndexInput in = dir.openInput(fileNameIn); + IndexOutput out = dir.createOutput(fileNameOut); + long length = in.length(); + for(int i=0;i 1 but got " + gen, gen > 1); + + String fileNameIn = SegmentInfos.getCurrentSegmentFileName(dir); + String fileNameOut = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, + "", + 1+gen); + IndexInput in = dir.openInput(fileNameIn); + IndexOutput out = dir.createOutput(fileNameOut); + long length = in.length(); + for(int i=0;i 1 but got " + gen, gen > 1); + + String[] files = dir.list(); + for(int i=0;i dirName, removing dirName + first */ + public void unzip(String dirName) throws IOException { + rmDir(dirName); + + Enumeration entries; + ZipFile zipFile; + zipFile = new ZipFile(dirName + ".zip"); + + entries = zipFile.entries(); + File fileDir = new File(dirName); + fileDir.mkdir(); + + while (entries.hasMoreElements()) { + ZipEntry entry = (ZipEntry) entries.nextElement(); + + InputStream in = zipFile.getInputStream(entry); + OutputStream out = new BufferedOutputStream(new FileOutputStream(new File(fileDir, entry.getName()))); + + byte[] buffer = new byte[8192]; + int len; + while((len = in.read(buffer)) >= 0) { + out.write(buffer, 0, len); + } + + in.close(); + out.close(); + } + + zipFile.close(); + } + + public void testCreateCFS() throws IOException { + createIndex("testindex.cfs", true); + } + + public void testCreateNoCFS() throws IOException { + createIndex("testindex.nocfs", false); + } + + public void testSearchOldIndexCFS() throws IOException { + String dirName = "src/test/org/apache/lucene/index/index.prelockless.cfs"; + unzip(dirName); + searchIndex(dirName); + rmDir(dirName); + } + + public void testIndexOldIndexCFSNoAdds() throws IOException { + String dirName = "src/test/org/apache/lucene/index/index.prelockless.cfs"; + unzip(dirName); + changeIndexNoAdds(dirName); + rmDir(dirName); + } + + public void testIndexOldIndexCFS() throws IOException { + String dirName = "src/test/org/apache/lucene/index/index.prelockless.cfs"; + unzip(dirName); + changeIndexWithAdds(dirName); + rmDir(dirName); + } + + public void testSearchOldIndexNoCFS() throws IOException { + String dirName = "src/test/org/apache/lucene/index/index.prelockless.nocfs"; + unzip(dirName); + searchIndex(dirName); + rmDir(dirName); + } + + public void testIndexOldIndexNoCFS() throws IOException { + String dirName = "src/test/org/apache/lucene/index/index.prelockless.nocfs"; + unzip(dirName); + changeIndexWithAdds(dirName); + rmDir(dirName); + } + + public void testIndexOldIndexNoCFSNoAdds() throws IOException { + String dirName = "src/test/org/apache/lucene/index/index.prelockless.nocfs"; + unzip(dirName); + changeIndexNoAdds(dirName); + rmDir(dirName); + } + + public void searchIndex(String dirName) throws IOException { + //QueryParser parser = new QueryParser("contents", new WhitespaceAnalyzer()); + //Query query = parser.parse("handle:1"); + + Directory dir = FSDirectory.getDirectory(dirName, false); + IndexSearcher searcher = new IndexSearcher(dir); + + Hits hits = searcher.search(new TermQuery(new Term("content", "aaa"))); + assertEquals(34, hits.length()); + Document d = hits.doc(0); + + // First document should be #21 since it's norm was increased: + assertEquals("didn't get the right document first", "21", d.get("id")); + + searcher.close(); + dir.close(); + } + + /* Open pre-lockless index, add docs, do a delete & + * setNorm, and search */ + public void changeIndexWithAdds(String dirName) throws IOException { + + Directory dir = FSDirectory.getDirectory(dirName, false); + // open writer + IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false); + + // add 10 docs + for(int i=0;i<10;i++) { + addDoc(writer, 35+i); + } + + // make sure writer sees right total -- writer seems not to know about deletes in .del? + assertEquals("wrong doc count", 45, writer.docCount()); + writer.close(); + + // make sure searching sees right # hits + IndexSearcher searcher = new IndexSearcher(dir); + Hits hits = searcher.search(new TermQuery(new Term("content", "aaa"))); + assertEquals("wrong number of hits", 44, hits.length()); + Document d = hits.doc(0); + assertEquals("wrong first document", "21", d.get("id")); + searcher.close(); + + // make sure we can do another delete & another setNorm against this + // pre-lockless segment: + IndexReader reader = IndexReader.open(dir); + Term searchTerm = new Term("id", "6"); + int delCount = reader.deleteDocuments(searchTerm); + assertEquals("wrong delete count", 1, delCount); + reader.setNorm(22, "content", (float) 2.0); + reader.close(); + + // make sure 2nd delete & 2nd norm "took": + searcher = new IndexSearcher(dir); + hits = searcher.search(new TermQuery(new Term("content", "aaa"))); + assertEquals("wrong number of hits", 43, hits.length()); + d = hits.doc(0); + assertEquals("wrong first document", "22", d.get("id")); + searcher.close(); + + // optimize + writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false); + writer.optimize(); + writer.close(); + + searcher = new IndexSearcher(dir); + hits = searcher.search(new TermQuery(new Term("content", "aaa"))); + assertEquals("wrong number of hits", 43, hits.length()); + d = hits.doc(0); + assertEquals("wrong first document", "22", d.get("id")); + searcher.close(); + + dir.close(); + } + + /* Open pre-lockless index, add docs, do a delete & + * setNorm, and search */ + public void changeIndexNoAdds(String dirName) throws IOException { + + Directory dir = FSDirectory.getDirectory(dirName, false); + + // make sure searching sees right # hits + IndexSearcher searcher = new IndexSearcher(dir); + Hits hits = searcher.search(new TermQuery(new Term("content", "aaa"))); + assertEquals("wrong number of hits", 34, hits.length()); + Document d = hits.doc(0); + assertEquals("wrong first document", "21", d.get("id")); + searcher.close(); + + // make sure we can do another delete & another setNorm against this + // pre-lockless segment: + IndexReader reader = IndexReader.open(dir); + Term searchTerm = new Term("id", "6"); + int delCount = reader.deleteDocuments(searchTerm); + assertEquals("wrong delete count", 1, delCount); + reader.setNorm(22, "content", (float) 2.0); + reader.close(); + + // make sure 2nd delete & 2nd norm "took": + searcher = new IndexSearcher(dir); + hits = searcher.search(new TermQuery(new Term("content", "aaa"))); + assertEquals("wrong number of hits", 33, hits.length()); + d = hits.doc(0); + assertEquals("wrong first document", "22", d.get("id")); + searcher.close(); + + // optimize + IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false); + writer.optimize(); + writer.close(); + + searcher = new IndexSearcher(dir); + hits = searcher.search(new TermQuery(new Term("content", "aaa"))); + assertEquals("wrong number of hits", 33, hits.length()); + d = hits.doc(0); + assertEquals("wrong first document", "22", d.get("id")); + searcher.close(); + + dir.close(); + } + + public void createIndex(String dirName, boolean doCFS) throws IOException { + + Directory dir = FSDirectory.getDirectory(dirName, true); + IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); + writer.setUseCompoundFile(doCFS); + + for(int i=0;i<35;i++) { + addDoc(writer, i); + } + assertEquals("wrong doc count", 35, writer.docCount()); + writer.close(); + + // Delete one doc so we get a .del file: + IndexReader reader = IndexReader.open(dir); + Term searchTerm = new Term("id", "7"); + int delCount = reader.deleteDocuments(searchTerm); + assertEquals("didn't delete the right number of documents", 1, delCount); + + // Set one norm so we get a .s0 file: + reader.setNorm(21, "content", (float) 1.5); + reader.close(); + + rmDir(dirName); + } + + /* Verifies that the expected file names were produced */ + + public void testExactFileNames() throws IOException { + + String outputDir = "lucene.backwardscompat0.index"; + Directory dir = FSDirectory.getDirectory(outputDir, true); + IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); + for(int i=0;i<35;i++) { + addDoc(writer, i); + } + assertEquals("wrong doc count", 35, writer.docCount()); + writer.close(); + + // Delete one doc so we get a .del file: + IndexReader reader = IndexReader.open(dir); + Term searchTerm = new Term("id", "7"); + int delCount = reader.deleteDocuments(searchTerm); + assertEquals("didn't delete the right number of documents", 1, delCount); + + // Set one norm so we get a .s0 file: + reader.setNorm(21, "content", (float) 1.5); + reader.close(); + + // Now verify file names: + String[] expected = {"_0.cfs", + "_0_1.del", + "_1.cfs", + "_2.cfs", + "_2_1.s0", + "_3.cfs", + "segments_a", + "segments.gen"}; + + String[] actual = dir.list(); + Arrays.sort(expected); + Arrays.sort(actual); + if (!Arrays.equals(expected, actual)) { + fail("incorrect filenames in index: expected:\n " + asString(expected) + "\n actual:\n " + asString(actual)); + } + + rmDir(outputDir); + } + + private String asString(String[] l) { + String s = ""; + for(int i=0;i 0) { + s += "\n "; + } + s += l[i]; + } + return s; + } + + private void addDoc(IndexWriter writer, int id) throws IOException + { + Document doc = new Document(); + doc.add(new Field("content", "aaa", Field.Store.NO, Field.Index.TOKENIZED)); + doc.add(new Field("id", Integer.toString(id), Field.Store.YES, Field.Index.UN_TOKENIZED)); + writer.addDocument(doc); + } + + private void rmDir(String dir) { + File fileDir = new File(dir); + if (fileDir.exists()) { + File[] files = fileDir.listFiles(); + if (files != null) { + for (int i = 0; i < files.length; i++) { + files[i].delete(); + } + } + fileDir.delete(); + } + } +} \ No newline at end of file Index: src/java/org/apache/lucene/index/MultiReader.java =================================================================== --- src/java/org/apache/lucene/index/MultiReader.java (revision 468583) +++ src/java/org/apache/lucene/index/MultiReader.java (working copy) @@ -217,6 +217,13 @@ return new MultiTermPositions(subReaders, starts); } + protected void setDeleter(IndexFileDeleter deleter) { + // Share deleter to our SegmentReaders: + this.deleter = deleter; + for (int i = 0; i < subReaders.length; i++) + subReaders[i].setDeleter(deleter); + } + protected void doCommit() throws IOException { for (int i = 0; i < subReaders.length; i++) subReaders[i].commit(); Index: src/java/org/apache/lucene/index/IndexReader.java =================================================================== --- src/java/org/apache/lucene/index/IndexReader.java (revision 468583) +++ src/java/org/apache/lucene/index/IndexReader.java (working copy) @@ -112,6 +112,7 @@ private Directory directory; private boolean directoryOwner; private boolean closeDirectory; + protected IndexFileDeleter deleter; private SegmentInfos segmentInfos; private Lock writeLock; @@ -137,24 +138,40 @@ } private static IndexReader open(final Directory directory, final boolean closeDirectory) throws IOException { - synchronized (directory) { // in- & inter-process sync - return (IndexReader)new Lock.With( - directory.makeLock(IndexWriter.COMMIT_LOCK_NAME), - IndexWriter.COMMIT_LOCK_TIMEOUT) { - public Object doBody() throws IOException { - SegmentInfos infos = new SegmentInfos(); - infos.read(directory); - if (infos.size() == 1) { // index is optimized - return SegmentReader.get(infos, infos.info(0), closeDirectory); + + return (IndexReader) new SegmentInfos.FindSegmentsFile(directory) { + + public Object doBody(String segmentFileName) throws IOException { + + SegmentInfos infos = new SegmentInfos(); + infos.read(directory, segmentFileName); + + if (infos.size() == 1) { // index is optimized + return SegmentReader.get(infos, infos.info(0), closeDirectory); + } else { + + // To reduce the chance of hitting FileNotFound + // (and having to retry), we open segments in + // reverse because IndexWriter merges & deletes + // the newest segments first. + + IndexReader[] readers = new IndexReader[infos.size()]; + for (int i = infos.size()-1; i >= 0; i--) { + try { + readers[i] = SegmentReader.get(infos.info(i)); + } catch (IOException e) { + // Close all readers we had opened: + for(i++;itrue if an index exists; false otherwise */ public static boolean indexExists(String directory) { - return (new File(directory, IndexFileNames.SEGMENTS)).exists(); + return indexExists(new File(directory)); } /** @@ -327,8 +326,9 @@ * @param directory the directory to check for an index * @return true if an index exists; false otherwise */ + public static boolean indexExists(File directory) { - return (new File(directory, IndexFileNames.SEGMENTS)).exists(); + return SegmentInfos.getCurrentSegmentGeneration(directory.list()) != -1; } /** @@ -339,7 +339,7 @@ * @throws IOException if there is a problem with accessing the index */ public static boolean indexExists(Directory directory) throws IOException { - return directory.fileExists(IndexFileNames.SEGMENTS); + return SegmentInfos.getCurrentSegmentGeneration(directory) != -1; } /** Returns the number of documents in this index. */ @@ -591,17 +591,22 @@ */ protected final synchronized void commit() throws IOException{ if(hasChanges){ + if (deleter == null) { + // In the MultiReader case, we share this deleter + // across all SegmentReaders: + setDeleter(new IndexFileDeleter(segmentInfos, directory)); + deleter.deleteFiles(); + } if(directoryOwner){ - synchronized (directory) { // in- & inter-process sync - new Lock.With(directory.makeLock(IndexWriter.COMMIT_LOCK_NAME), - IndexWriter.COMMIT_LOCK_TIMEOUT) { - public Object doBody() throws IOException { - doCommit(); - segmentInfos.write(directory); - return null; - } - }.run(); - } + deleter.clearPendingFiles(); + doCommit(); + String oldInfoFileName = segmentInfos.getCurrentSegmentFileName(); + segmentInfos.write(directory); + // Attempt to delete all files we just obsoleted: + + deleter.deleteFile(oldInfoFileName); + deleter.commitPendingFiles(); + deleter.deleteFiles(); if (writeLock != null) { writeLock.release(); // release write lock writeLock = null; @@ -613,6 +618,13 @@ hasChanges = false; } + protected void setDeleter(IndexFileDeleter deleter) { + this.deleter = deleter; + } + protected IndexFileDeleter getDeleter() { + return deleter; + } + /** Implements commit. */ protected abstract void doCommit() throws IOException; @@ -657,8 +669,7 @@ */ public static boolean isLocked(Directory directory) throws IOException { return - directory.makeLock(IndexWriter.WRITE_LOCK_NAME).isLocked() || - directory.makeLock(IndexWriter.COMMIT_LOCK_NAME).isLocked(); + directory.makeLock(IndexWriter.WRITE_LOCK_NAME).isLocked(); } /** @@ -683,7 +694,6 @@ */ public static void unlock(Directory directory) throws IOException { directory.makeLock(IndexWriter.WRITE_LOCK_NAME).release(); - directory.makeLock(IndexWriter.COMMIT_LOCK_NAME).release(); } /** Index: src/java/org/apache/lucene/index/IndexFileNames.java =================================================================== --- src/java/org/apache/lucene/index/IndexFileNames.java (revision 468583) +++ src/java/org/apache/lucene/index/IndexFileNames.java (working copy) @@ -26,19 +26,25 @@ /** Name of the index segment file */ static final String SEGMENTS = "segments"; + + /** Name of the generation reference file name */ + static final String SEGMENTS_GEN = "segments.gen"; - /** Name of the index deletable file */ + /** Name of the index deletable file (only used in + * pre-lockless indices) */ static final String DELETABLE = "deletable"; - + /** - * This array contains all filename extensions used by Lucene's index files, with - * one exception, namely the extension made up from .f + a number. - * Also note that two of Lucene's files (deletable and - * segments) don't have any filename extension. + * This array contains all filename extensions used by + * Lucene's index files, with two exceptions, namely the + * extension made up from .f + a number and + * from .s + a number. Also note that + * Lucene's segments_N files do not have any + * filename extension. */ static final String INDEX_EXTENSIONS[] = new String[] { "cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del", - "tvx", "tvd", "tvf", "tvp" }; + "tvx", "tvd", "tvf", "tvp", "gen"}; /** File extensions of old-style index files */ static final String COMPOUND_EXTENSIONS[] = new String[] { @@ -49,5 +55,24 @@ static final String VECTOR_EXTENSIONS[] = new String[] { "tvx", "tvd", "tvf" }; - + + /** + * Computes the full file name from base, extension and + * generation. If the generation is -1, the file name is + * null. If it's 0, the file name is . + * If it's > 0, the file name is _. + * + * @param base -- main part of the file name + * @param extension -- extension of the filename (including .) + * @param gen -- generation + */ + public static final String fileNameFromGeneration(String base, String extension, long gen) { + if (gen == -1) { + return null; + } else if (gen == 0) { + return base + extension; + } else { + return base + "_" + Long.toString(gen, Character.MAX_RADIX) + extension; + } + } } Index: src/java/org/apache/lucene/index/SegmentInfos.java =================================================================== --- src/java/org/apache/lucene/index/SegmentInfos.java (revision 468583) +++ src/java/org/apache/lucene/index/SegmentInfos.java (working copy) @@ -18,6 +18,9 @@ import java.util.Vector; import java.io.IOException; +import java.io.PrintStream; +import java.io.File; +import java.io.FileNotFoundException; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; @@ -28,26 +31,138 @@ /** The file format version, a negative number. */ /* Works since counter, the old 1st entry, is always >= 0 */ public static final int FORMAT = -1; - + + /** This is the current file format written. It differs + * slightly from the previous format in that file names + * are never re-used (write once). Instead, each file is + * written to the next generation. For example, + * segments_1, segments_2, etc. This allows us to not use + * a commit lock. See file + * formats for details. + */ + public static final int FORMAT_LOCKLESS = -2; + public int counter = 0; // used to name new segments /** * counts how often the index has been changed by adding or deleting docs. * starting with the current time in milliseconds forces to create unique version numbers. */ private long version = System.currentTimeMillis(); + private long generation = 0; // generation of the "segments_N" file we read + /** + * If non-null, information about loading segments_N files + * will be printed here. @see #setInfoStream. + */ + private static PrintStream infoStream; + public final SegmentInfo info(int i) { return (SegmentInfo) elementAt(i); } - public final void read(Directory directory) throws IOException { - - IndexInput input = directory.openInput(IndexFileNames.SEGMENTS); + /** + * Get the generation (N) of the current segments_N file + * from a list of files. + * + * @param files -- array of file names to check + */ + public static long getCurrentSegmentGeneration(String[] files) { + if (files == null) { + return -1; + } + long max = -1; + int prefixLen = IndexFileNames.SEGMENTS.length()+1; + for (int i = 0; i < files.length; i++) { + String file = files[i]; + if (file.startsWith(IndexFileNames.SEGMENTS) && !file.equals(IndexFileNames.SEGMENTS_GEN)) { + if (file.equals(IndexFileNames.SEGMENTS)) { + // Pre lock-less commits: + if (max == -1) { + max = 0; + } + } else { + long v = Long.parseLong(file.substring(prefixLen), Character.MAX_RADIX); + if (v > max) { + max = v; + } + } + } + } + return max; + } + + /** + * Get the generation (N) of the current segments_N file + * in the directory. + * + * @param directory -- directory to search for the latest segments_N file + */ + public static long getCurrentSegmentGeneration(Directory directory) throws IOException { + String[] files = directory.list(); + if (files == null) + throw new IOException("Cannot read directory " + directory); + return getCurrentSegmentGeneration(files); + } + + /** + * Get the filename of the current segments_N file + * from a list of files. + * + * @param files -- array of file names to check + */ + + public static String getCurrentSegmentFileName(String[] files) throws IOException { + return IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, + "", + getCurrentSegmentGeneration(files)); + } + + /** + * Get the filename of the current segments_N file + * in the directory. + * + * @param directory -- directory to search for the latest segments_N file + */ + public static String getCurrentSegmentFileName(Directory directory) throws IOException { + return IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, + "", + getCurrentSegmentGeneration(directory)); + } + + /** + * Get the segment_N filename in use by this segment infos. + */ + public String getCurrentSegmentFileName() { + return IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, + "", + generation); + } + + /** + * Read a particular segmentFileName. Note that this may + * throw an IOException if a commit is in process. + * + * @param directory -- directory containing the segments file + * @param segmentFileName -- segment file to load + */ + public final void read(Directory directory, String segmentFileName) throws IOException { + boolean success = false; + + IndexInput input = directory.openInput(segmentFileName); + + if (segmentFileName.equals(IndexFileNames.SEGMENTS)) { + generation = 0; + } else { + generation = Long.parseLong(segmentFileName.substring(1+IndexFileNames.SEGMENTS.length()), + Character.MAX_RADIX); + } + try { int format = input.readInt(); if(format < 0){ // file contains explicit format info // check that it is a format we can understand - if (format < FORMAT) + if (format < FORMAT_LOCKLESS) throw new IOException("Unknown format version: " + format); version = input.readLong(); // read version counter = input.readInt(); // read counter @@ -57,9 +172,7 @@ } for (int i = input.readInt(); i > 0; i--) { // read segmentInfos - SegmentInfo si = - new SegmentInfo(input.readString(), input.readInt(), directory); - addElement(si); + addElement(new SegmentInfo(directory, format, input)); } if(format >= 0){ // in old format the version number may be at the end of the file @@ -68,31 +181,70 @@ else version = input.readLong(); // read version } + success = true; } finally { input.close(); + if (!success) { + // Clear any segment infos we had loaded so we + // have a clean slate on retry: + clear(); + } } } + /** + * This version of read uses the retry logic (for lock-less + * commits) to find the right segments file to load. + */ + public final void read(Directory directory) throws IOException { + generation = -1; + + new FindSegmentsFile(directory) { + + public Object doBody(String segmentFileName) throws IOException { + read(directory, segmentFileName); + return null; + } + }.run(); + } + public final void write(Directory directory) throws IOException { - IndexOutput output = directory.createOutput("segments.new"); + + // Always advance the generation on write: + if (generation == -1) { + generation = 1; + } else { + generation++; + } + + String segmentFileName = getCurrentSegmentFileName(); + IndexOutput output = directory.createOutput(segmentFileName); + try { - output.writeInt(FORMAT); // write FORMAT - output.writeLong(++version); // every write changes the index + output.writeInt(FORMAT_LOCKLESS); // write FORMAT + output.writeLong(++version); // every write changes + // the index output.writeInt(counter); // write counter output.writeInt(size()); // write infos for (int i = 0; i < size(); i++) { SegmentInfo si = info(i); - output.writeString(si.name); - output.writeInt(si.docCount); + si.write(output); } } finally { output.close(); } - // install new segment info - directory.renameFile("segments.new", IndexFileNames.SEGMENTS); + try { + output = directory.createOutput(IndexFileNames.SEGMENTS_GEN); + output.writeLong(generation); + output.writeLong(generation); + output.close(); + } catch (IOException e) { + // It's OK if we fail to write this file since it's + // used only as one of the retry fallbacks. + } } /** @@ -107,30 +259,266 @@ */ public static long readCurrentVersion(Directory directory) throws IOException { + + return ((Long) new FindSegmentsFile(directory) { + public Object doBody(String segmentFileName) throws IOException { + + IndexInput input = directory.openInput(segmentFileName); + + int format = 0; + long version = 0; + try { + format = input.readInt(); + if(format < 0){ + if (format < FORMAT_LOCKLESS) + throw new IOException("Unknown format version: " + format); + version = input.readLong(); // read version + } + } + finally { + input.close(); + } + + if(format < 0) + return new Long(version); + + // We cannot be sure about the format of the file. + // Therefore we have to read the whole file and cannot simply seek to the version entry. + SegmentInfos sis = new SegmentInfos(); + sis.read(directory, segmentFileName); + return new Long(sis.getVersion()); + } + }.run()).longValue(); + } + + /** If non-null, information about retries when loading + * the segments file will be printed to this: + */ + public static void setInfoStream(PrintStream infoStream) { + SegmentInfos.infoStream = infoStream; + } + + /** + * @see #setInfoStream + */ + public static PrintStream getInfoStream() { + return infoStream; + } + + private static void message(String message) { + if (infoStream != null) { + infoStream.println(Thread.currentThread().getName() + ": " + message); + } + } + + /** + * Utility class for executing code that needs to do + * something with the current segments file. This is + * necessary with lock-less commits because from the time + * you locate the current segments file name, until you + * actually open it, read its contents, or check modified + * time, etc., it could have been deleted due to a writer + * commit finishing. + */ + public abstract static class FindSegmentsFile { + + File fileDirectory; + Directory directory; + + public FindSegmentsFile(File directory) { + this.fileDirectory = directory; + } + + public FindSegmentsFile(Directory directory) { + this.directory = directory; + } + + public Object run() throws IOException { + String segmentFileName = null; + long lastGen = -1; + long gen = 0; + int genLookaheadCount = 0; + IOException exc = null; + boolean retry = false; + + int method = 0; + + // Loop until we succeed in calling doBody() without + // hitting an IOException. An IOException most likely + // means a commit was in process and has finished, in + // the time it took us to load the now-old infos files + // (and segments files). It's also possible it's a + // true error (corrupt index). To distinguish these, + // on each retry we must see "forward progress" on + // which generation we are trying to load. If we + // don't, then the original error is real and we throw + // it. - IndexInput input = directory.openInput(IndexFileNames.SEGMENTS); - int format = 0; - long version = 0; - try { - format = input.readInt(); - if(format < 0){ - if (format < FORMAT) - throw new IOException("Unknown format version: " + format); - version = input.readLong(); // read version + // We have three methods for determining the current + // generation. We try each in sequence. + + while(true) { + + // Method 1: list the directory and use the highest + // segments_N file. This method works well as long + // as there is no stale caching on the directory + // contents: + String[] files = null; + + if (0 == method) { + if (directory != null) { + files = directory.list(); + } else { + files = fileDirectory.list(); + } + + gen = getCurrentSegmentGeneration(files); + + if (gen == -1) { + String s = ""; + for(int i=0;i gen) { + message("fallback to '" + IndexFileNames.SEGMENTS_GEN + "' check: now try generation " + gen0 + " > " + gen); + gen = gen0; + } + break; + } + } catch (IOException err2) { + // will retry + } finally { + genInput.close(); + } + } + try { + // LOCKLESS TODO: make this 50 msec configurable/settable + Thread.sleep(50); + } catch (InterruptedException e) { + // will retry + } + } + } + + // Method 3 (fallback if Methods 2 & 3 are not + // reliabel): since both directory cache and file + // contents cache seem to be stale, just advance the + // generation. + if (2 == method || (1 == method && lastGen == gen && retry)) { + + method = 2; + + // LOCKLESS TODO: make this 10 max lookahead configurable/settable + if (genLookaheadCount < 10) { + gen++; + genLookaheadCount++; + message("look ahead incremenent gen to " + gen); + } + } + + if (lastGen == gen) { + + // This means we're about to try the same + // segments_N last tried. This is allowed, + // exactly once, because writer could have been in + // the process of writing segments_N last time. + + if (retry) { + // OK, we've tried the same segments_N file + // twice in a row, so this must be a real + // error. We throw the original exception we + // got. + throw exc; + } else { + retry = true; + } + + } else { + // Segment file has advanced since our last loop, so + // reset retry: + retry = false; + } + + lastGen = gen; + + segmentFileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, + "", + gen); + + try { + Object v = doBody(segmentFileName); + if (exc != null) { + message("success on " + segmentFileName); + } + return v; + } catch (IOException err) { + + // Save the original root cause: + if (exc == null) { + exc = err; + } + + message("primary Exception on '" + segmentFileName + "': " + err + "'; will retry: retry=" + retry + "; gen = " + gen); + + if (!retry && gen > 1) { + + // This is our first time trying this segments + // file (because retry is false), and, there is + // possibly a segments_(N-1) (because gen > 1). + // So, check if the segments_(N-1) exists and + // try it if so: + String prevSegmentFileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, + "", + gen-1); + + if (directory.fileExists(prevSegmentFileName)) { + message("fallback to prior segment file '" + prevSegmentFileName + "'"); + try { + Object v = doBody(prevSegmentFileName); + if (exc != null) { + message("success on fallback " + prevSegmentFileName); + } + return v; + } catch (IOException err2) { + message("secondary Exception on '" + prevSegmentFileName + "': " + err2 + "'; will retry"); + } + } + } + } } } - finally { - input.close(); - } - - if(format < 0) - return version; - // We cannot be sure about the format of the file. - // Therefore we have to read the whole file and cannot simply seek to the version entry. - - SegmentInfos sis = new SegmentInfos(); - sis.read(directory); - return sis.getVersion(); - } + /** + * Subclass must implement this. The assumption is an + * IOException will be thrown if something goes wrong + * during the processing that could have been caused by + * a writer committing. + */ + protected abstract Object doBody(String segmentFileName) throws IOException;} } Index: src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- src/java/org/apache/lucene/index/IndexWriter.java (revision 468583) +++ src/java/org/apache/lucene/index/IndexWriter.java (working copy) @@ -66,16 +66,7 @@ private long writeLockTimeout = WRITE_LOCK_TIMEOUT; - /** - * Default value for the commit lock timeout (10,000). - * @see #setDefaultCommitLockTimeout - */ - public static long COMMIT_LOCK_TIMEOUT = 10000; - - private long commitLockTimeout = COMMIT_LOCK_TIMEOUT; - public static final String WRITE_LOCK_NAME = "write.lock"; - public static final String COMMIT_LOCK_NAME = "commit.lock"; /** * Default value is 10. Change using {@link #setMergeFactor(int)}. @@ -110,6 +101,7 @@ private SegmentInfos segmentInfos = new SegmentInfos(); // the segments private SegmentInfos ramSegmentInfos = new SegmentInfos(); // the segments in ramDirectory private final Directory ramDirectory = new RAMDirectory(); // for temp segs + private IndexFileDeleter deleter; private Lock writeLock; @@ -258,17 +250,28 @@ throw new IOException("Index locked for write: " + writeLock); this.writeLock = writeLock; // save it - synchronized (directory) { // in- & inter-process sync - new Lock.With(directory.makeLock(IndexWriter.COMMIT_LOCK_NAME), commitLockTimeout) { - public Object doBody() throws IOException { - if (create) - segmentInfos.write(directory); - else - segmentInfos.read(directory); - return null; - } - }.run(); + if (create) { + // Try to read first. This is to allow create + // against an index that's currently open for + // searching. In this case we write the next + // segments_N file with no segments: + try { + segmentInfos.read(directory); + segmentInfos.clear(); + } catch (IOException e) { + // Likely this means it's a fresh directory + } + segmentInfos.write(directory); + } else { + segmentInfos.read(directory); } + + // Create a deleter to keep track of which files can + // be deleted: + deleter = new IndexFileDeleter(segmentInfos, directory); + deleter.setInfoStream(infoStream); + deleter.findDeletableFiles(); + deleter.deleteFiles(); } /** Determines the largest number of documents ever merged by addDocument(). @@ -373,35 +376,6 @@ } /** - * Sets the maximum time to wait for a commit lock (in milliseconds) for this instance of IndexWriter. @see - * @see #setDefaultCommitLockTimeout to change the default value for all instances of IndexWriter. - */ - public void setCommitLockTimeout(long commitLockTimeout) { - this.commitLockTimeout = commitLockTimeout; - } - - /** - * @see #setCommitLockTimeout - */ - public long getCommitLockTimeout() { - return commitLockTimeout; - } - - /** - * Sets the default (for any instance of IndexWriter) maximum time to wait for a commit lock (in milliseconds) - */ - public static void setDefaultCommitLockTimeout(long commitLockTimeout) { - IndexWriter.COMMIT_LOCK_TIMEOUT = commitLockTimeout; - } - - /** - * @see #setDefaultCommitLockTimeout - */ - public static long getDefaultCommitLockTimeout() { - return IndexWriter.COMMIT_LOCK_TIMEOUT; - } - - /** * Sets the maximum time to wait for a write lock (in milliseconds) for this instance of IndexWriter. @see * @see #setDefaultWriteLockTimeout to change the default value for all instances of IndexWriter. */ @@ -509,7 +483,7 @@ String segmentName = newRAMSegmentName(); dw.addDocument(segmentName, doc); synchronized (this) { - ramSegmentInfos.addElement(new SegmentInfo(segmentName, 1, ramDirectory)); + ramSegmentInfos.addElement(new SegmentInfo(segmentName, 1, ramDirectory, false)); maybeFlushRamSegments(); } } @@ -782,36 +756,26 @@ int docCount = merger.merge(); // merge 'em segmentInfos.setSize(0); // pop old infos & add new - segmentInfos.addElement(new SegmentInfo(mergedName, docCount, directory)); + SegmentInfo info = new SegmentInfo(mergedName, docCount, directory, false); + segmentInfos.addElement(info); if(sReader != null) sReader.close(); - synchronized (directory) { // in- & inter-process sync - new Lock.With(directory.makeLock(COMMIT_LOCK_NAME), commitLockTimeout) { - public Object doBody() throws IOException { - segmentInfos.write(directory); // commit changes - return null; - } - }.run(); - } + String segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName(); + segmentInfos.write(directory); // commit changes - deleteSegments(segmentsToDelete); // delete now-unused segments + deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file + deleter.deleteSegments(segmentsToDelete); // delete now-unused segments if (useCompoundFile) { - final Vector filesToDelete = merger.createCompoundFile(mergedName + ".tmp"); - synchronized (directory) { // in- & inter-process sync - new Lock.With(directory.makeLock(COMMIT_LOCK_NAME), commitLockTimeout) { - public Object doBody() throws IOException { - // make compound file visible for SegmentReaders - directory.renameFile(mergedName + ".tmp", mergedName + ".cfs"); - return null; - } - }.run(); - } + Vector filesToDelete = merger.createCompoundFile(mergedName + ".cfs"); + segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName(); + info.setUseCompoundFile(true); + segmentInfos.write(directory); // commit again so readers know we've switched this segment to a compound file - // delete now unused files of segment - deleteFiles(filesToDelete); + deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file + deleter.deleteFiles(filesToDelete); // delete now unused files of segment } } @@ -929,10 +893,11 @@ */ private final int mergeSegments(SegmentInfos sourceSegments, int minSegment, int end) throws IOException { + final String mergedName = newSegmentName(); if (infoStream != null) infoStream.print("merging segments"); SegmentMerger merger = new SegmentMerger(this, mergedName); - + final Vector segmentsToDelete = new Vector(); for (int i = minSegment; i < end; i++) { SegmentInfo si = sourceSegments.info(i); @@ -952,7 +917,7 @@ } SegmentInfo newSegment = new SegmentInfo(mergedName, mergedDocCount, - directory); + directory, false); if (sourceSegments == ramSegmentInfos) { sourceSegments.removeAllElements(); segmentInfos.addElement(newSegment); @@ -965,115 +930,26 @@ // close readers before we attempt to delete now-obsolete segments merger.closeReaders(); - synchronized (directory) { // in- & inter-process sync - new Lock.With(directory.makeLock(COMMIT_LOCK_NAME), commitLockTimeout) { - public Object doBody() throws IOException { - segmentInfos.write(directory); // commit before deleting - return null; - } - }.run(); - } - - deleteSegments(segmentsToDelete); // delete now-unused segments + String segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName(); + segmentInfos.write(directory); // commit before deleting + deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file + deleter.deleteSegments(segmentsToDelete); // delete now-unused segments + if (useCompoundFile) { - final Vector filesToDelete = merger.createCompoundFile(mergedName + ".tmp"); - synchronized (directory) { // in- & inter-process sync - new Lock.With(directory.makeLock(COMMIT_LOCK_NAME), commitLockTimeout) { - public Object doBody() throws IOException { - // make compound file visible for SegmentReaders - directory.renameFile(mergedName + ".tmp", mergedName + ".cfs"); - return null; - } - }.run(); - } + Vector filesToDelete = merger.createCompoundFile(mergedName + ".cfs"); - // delete now unused files of segment - deleteFiles(filesToDelete); + segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName(); + newSegment.setUseCompoundFile(true); + segmentInfos.write(directory); // commit again so readers know we've switched this segment to a compound file + + deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file + deleter.deleteFiles(filesToDelete); // delete now-unused segments } return mergedDocCount; } - /* - * Some operating systems (e.g. Windows) don't permit a file to be deleted - * while it is opened for read (e.g. by another process or thread). So we - * assume that when a delete fails it is because the file is open in another - * process, and queue the file for subsequent deletion. - */ - - private final void deleteSegments(Vector segments) throws IOException { - Vector deletable = new Vector(); - - deleteFiles(readDeleteableFiles(), deletable); // try to delete deleteable - - for (int i = 0; i < segments.size(); i++) { - SegmentReader reader = (SegmentReader)segments.elementAt(i); - if (reader.directory() == this.directory) - deleteFiles(reader.files(), deletable); // try to delete our files - else - deleteFiles(reader.files(), reader.directory()); // delete other files - } - - writeDeleteableFiles(deletable); // note files we can't delete - } - - private final void deleteFiles(Vector files) throws IOException { - Vector deletable = new Vector(); - deleteFiles(readDeleteableFiles(), deletable); // try to delete deleteable - deleteFiles(files, deletable); // try to delete our files - writeDeleteableFiles(deletable); // note files we can't delete - } - - private final void deleteFiles(Vector files, Directory directory) - throws IOException { - for (int i = 0; i < files.size(); i++) - directory.deleteFile((String)files.elementAt(i)); - } - - private final void deleteFiles(Vector files, Vector deletable) - throws IOException { - for (int i = 0; i < files.size(); i++) { - String file = (String)files.elementAt(i); - try { - directory.deleteFile(file); // try to delete each file - } catch (IOException e) { // if delete fails - if (directory.fileExists(file)) { - if (infoStream != null) - infoStream.println(e.toString() + "; Will re-try later."); - deletable.addElement(file); // add to deletable - } - } - } - } - - private final Vector readDeleteableFiles() throws IOException { - Vector result = new Vector(); - if (!directory.fileExists(IndexFileNames.DELETABLE)) - return result; - - IndexInput input = directory.openInput(IndexFileNames.DELETABLE); - try { - for (int i = input.readInt(); i > 0; i--) // read file names - result.addElement(input.readString()); - } finally { - input.close(); - } - return result; - } - - private final void writeDeleteableFiles(Vector files) throws IOException { - IndexOutput output = directory.createOutput("deleteable.new"); - try { - output.writeInt(files.size()); - for (int i = 0; i < files.size(); i++) - output.writeString((String)files.elementAt(i)); - } finally { - output.close(); - } - directory.renameFile("deleteable.new", IndexFileNames.DELETABLE); - } - private final boolean checkNonDecreasingLevels(int start) { int lowerBound = -1; int upperBound = minMergeDocs; Index: src/java/org/apache/lucene/index/IndexFileDeleter.java =================================================================== --- src/java/org/apache/lucene/index/IndexFileDeleter.java (revision 0) +++ src/java/org/apache/lucene/index/IndexFileDeleter.java (revision 0) @@ -0,0 +1,182 @@ +package org.apache.lucene.index; + +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.IndexFileNameFilter; +import org.apache.lucene.index.SegmentInfos; +import org.apache.lucene.store.Directory; + +import java.io.IOException; +import java.io.PrintStream; +import java.util.Vector; +import java.util.HashMap; + +/** + * A utility class (used by both IndexReader and + * IndexWriter) to keep track of files that need to be + * deleted because they are no longer referenced by the + * index. + */ +public class IndexFileDeleter { + private Vector deletable; + private Vector pending; + private Directory directory; + private SegmentInfos segmentInfos; + private PrintStream infoStream; + + public IndexFileDeleter(SegmentInfos segmentInfos, Directory directory) + throws IOException { + this.segmentInfos = segmentInfos; + this.directory = directory; + } + + void setInfoStream(PrintStream infoStream) { + this.infoStream = infoStream; + } + + /** Determine index files that are no longer referenced + * and therefore should be deleted. This is called once + * (by the writer), and then subsequently we add onto + * deletable any files that are no longer needed at the + * point that we create the unused file (eg when merging + * segments), and we only remove from deletable when a + * file is successfully deleted. + */ + + public void findDeletableFiles() throws IOException { + + // Gather all "current" segments: + HashMap current = new HashMap(); + for(int j=0;j.cfs exists) + public SegmentInfo(String name, int docCount, Directory dir) { this.name = name; this.docCount = docCount; this.dir = dir; + delGen = -1; + isCompoundFile = 0; + preLockless = true; } + public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile) { + this(name, docCount, dir); + if (isCompoundFile) { + this.isCompoundFile = 1; + } else { + this.isCompoundFile = -1; + } + preLockless = false; + } + + + /** + * Construct a new SegmentInfo instance by reading a + * previously saved SegmentInfo from input. + * + * @param dir directory to load from + * @param format format of the segments info file + * @param input input handle to read segment info from + */ + public SegmentInfo(Directory dir, int format, IndexInput input) throws IOException { + this.dir = dir; + name = input.readString(); + docCount = input.readInt(); + if (format <= SegmentInfos.FORMAT_LOCKLESS) { + delGen = input.readLong(); + int numNormGen = input.readInt(); + if (numNormGen == -1) { + normGen = null; + } else { + normGen = new long[numNormGen]; + for(int j=0;j 0: this means this segment was written by + // the LOCKLESS code and for certain has + // deletions + // + if (delGen == -1) { + return false; + } else if (delGen > 0) { + return true; + } else { + return dir.fileExists(getDelFileName()); + } + } + + void advanceDelGen() { + // delGen 0 is reserved for pre-LOCKLESS format + if (delGen == -1) { + delGen = 1; + } else { + delGen++; + } + } + + void clearDelGen() { + delGen = -1; + } + + String getDelFileName() { + if (delGen == -1) { + // In this case we know there is no deletion filename + // against this segment + return null; + } else { + // If delGen is 0, it's the pre-lockless-commit file format + return IndexFileNames.fileNameFromGeneration(name, ".del", delGen); + } + } + + /** + * Returns true if this field for this segment has saved a separate norms file (__N.sX). + * + * @param fieldNumber the field index to check + */ + boolean hasSeparateNorms(int fieldNumber) + throws IOException { + if ((normGen == null && preLockless) || (normGen != null && normGen[fieldNumber] == 0)) { + // Must fallback to directory file exists check: + String fileName = name + ".s" + fieldNumber; + return dir.fileExists(fileName); + } else if (normGen == null || normGen[fieldNumber] == -1) { + return false; + } else { + return true; + } + } + + /** + * Returns true if any fields in this segment have separate norms. + */ + boolean hasSeparateNorms() + throws IOException { + if (normGen == null) { + if (!preLockless) { + // This means we were created w/ LOCKLESS code and no + // norms are written yet: + return false; + } else { + // This means this segment was saved with pre-LOCKLESS + // code. So we must fallback to the original + // directory list check: + String[] result = dir.list(); + String pattern; + pattern = name + ".s"; + int patternLength = pattern.length(); + for(int i = 0; i < result.length; i++){ + if(result[i].startsWith(pattern) && Character.isDigit(result[i].charAt(patternLength))) + return true; + } + return false; + } + } else { + // This means this segment was saved with LOCKLESS + // code so we first check whether any normGen's are > + // 0 (meaning they definitely have separate norms): + for(int i=0;i 0) { + return true; + } + } + // Next we look for any == 0. These cases were + // pre-LOCKLESS and must be checked in directory: + for(int i=0;i

This document defines the index file formats used - in Lucene version 2.0. If you are using a different + in Lucene version XXX. If you are using a different version of Lucene, please consult the copy of docs/fileformats.html that was distributed with the version you are using. @@ -141,6 +141,15 @@ Compatibility notes are provided in this document, describing how file formats have changed from prior versions.

+

+ In version XXX, the file format was changed to allow + lock-less commits. The change is fully backwards + compatible: you can open a pre-XXX index for searching + or adding/deleting of docs. When the new segments + file is saved (committed), it will be written in the + new file format (meaning no specific "upgrade" process + is needed). +

@@ -402,6 +411,15 @@ in an index are stored in a single directory, although this is not required.

+

+ As of version XXX (lock-less commits), file names are + never re-used. That is, when any file is saved to the + Directory it is given a never before used filename. + This is achieved using a simple generations approach. + For example, the first segments file is segments_1, + then segments_2, etc. The generation is a sequential + long integer represented in alpha-numeric (base 36) form. +

@@ -1078,27 +1096,53 @@

The active segments in the index are stored in the - segment info file. An index only has - a single file in this format, and it is named "segments". - This lists each segment by name, and also contains the size of each - segment. + segment info file, segments_N. There may + be one or more segments_N files in the + index; however, the one with the largest + generation is the active one (when older + segments_N files are present it's because they + temporarily cannot be deleted, or, a writer is in + the process of committing). This file lists each + segment by name, has details about the separate + norms and deletion files, and also contains the + size of each segment.

+ As of XXX, there is also a file + segments.gen. This file contains the + current generation (the _N in + segments_N) of the index. This is + recorded only as a fallback in case the current + generation cannot be accurately determined by + directory listing alone (as is the case for some + NFS clients with time-based directory cache + expiraation). This file simply contains the + generation recored as Int64, written twice. +

+

+ Pre-XXX: Segments --> Format, Version, NameCounter, SegCount, <SegName, SegSize>SegCount

- Format, NameCounter, SegCount, SegSize --> UInt32 + XXX and above: + Segments --> Format, Version, NameCounter, SegCount, <SegName, SegSize, DelGen, NumField, NormGenNumField >SegCount, IsCompoundFile

- Version --> UInt64 + Format, NameCounter, SegCount, SegSize, NumField --> UInt32

+ Version, DelGen, NormGen --> UInt64 +

+

SegName --> String

- Format is -1 in Lucene 1.4. + IsCompoundFile --> Int8

+ Format is -1 as of Lucene 1.4 and -2 as of XXX. +

+

Version counts how often the index has been changed by adding or deleting documents.

@@ -1134,15 +1178,20 @@
  • - When a file named "commit.lock" - is present, a process is currently re-writing the "segments" - file and deleting outdated segment index files, or a process is - reading the "segments" - file and opening the files of the segments it names. This lock file - prevents files from being deleted by another process after a process - has read the "segments" - file but before it has managed to open all of the files of the - segments named therein. + When a file named "commit.lock" is + present, a process is currently re-writing + the "segments" file and deleting outdated + segment index files, or a process is + reading the "segments" file and opening + the files of the segments it names. This + lock file prevents files from being + deleted by another process after a process + has read the "segments" file but before it + has managed to open all of the files of + the segments named therein. As of XXX, + the "commit.lock" is no longer used + because readers are able to open an index + even while a writer is committing.

  • @@ -1168,11 +1217,15 @@

    - A file named "deletable" - contains the names of files that are no longer used by the index, but - which could not be deleted. This is only used on Win32, where a - file may not be deleted while it is still open. On other platforms - the file contains only null bytes. + A file named "deletable" contains the names of + files that are no longer used by the index, but + which could not be deleted. This is only used on + Win32, where a file may not be deleted while it is + still open. On other platforms the file contains + only null bytes. As of version XXX, there is no + deleteable file. Instead, the deletable files are + computed on creating a writer and then maintained + as an in-memory vector.

    Deletable --> DeletableCount, Index: xdocs/fileformats.xml =================================================================== --- xdocs/fileformats.xml (revision 468583) +++ xdocs/fileformats.xml (working copy) @@ -14,7 +14,7 @@

    This document defines the index file formats used - in Lucene version 2.0. If you are using a different + in Lucene version XXX. If you are using a different version of Lucene, please consult the copy of docs/fileformats.html that was distributed with the version you are using. @@ -43,6 +43,16 @@ describing how file formats have changed from prior versions.

    +

    + In version XXX, the file format was changed to allow + lock-less commits. The change is fully backwards + compatible: you can open a pre-XXX index for searching + or adding/deleting of docs. When the new segments + file is saved (committed), it will be written in the + new file format (meaning no specific "upgrade" process + is needed). +

    +
    @@ -260,6 +270,16 @@ required.

    +

    + As of version XXX (lock-less commits), file names are + never re-used. That is, when any file is saved to the + Directory it is given a never before used filename. + This is achieved using a simple generations approach. + For example, the first segments file is segments_1, + then segments_2, etc. The generation is a sequential + long integer represented in alpha-numeric (base 36) form. +

    +
    @@ -696,22 +716,46 @@

    The active segments in the index are stored in the - segment info file. An index only has - a single file in this format, and it is named "segments". - This lists each segment by name, and also contains the size of each - segment. + segment info file, segments_N. There may + be one or more segments_N files in the + index; however, the one with the largest + generation is the active one (when older + segments_N files are present it's because they + temporarily cannot be deleted, or, a writer is in + the process of committing). This file lists each + segment by name, has details about the separate + norms and deletion files, and also contains the + size of each segment.

    +

    + As of XXX, there is also a file + segments.gen. This file contains the + current generation (the _N in + segments_N) of the index. This is + recorded only as a fallback in case the current + generation cannot be accurately determined by + directory listing alone (as is the case for some + NFS clients with time-based directory cache + expiraation). This file simply contains the + generation recored as Int64, written twice. +

    +

    + Pre-XXX: Segments --> Format, Version, NameCounter, SegCount, <SegName, SegSize>SegCount

    +

    + XXX and above: + Segments --> Format, Version, NameCounter, SegCount, <SegName, SegSize, DelGen, NumField, NormGenNumField >SegCount, IsCompoundFile +

    - Format, NameCounter, SegCount, SegSize --> UInt32 + Format, NameCounter, SegCount, SegSize, NumField --> UInt32

    - Version --> UInt64 + Version, DelGen, NormGen --> UInt64

    @@ -719,10 +763,14 @@

    - Format is -1 in Lucene 1.4. + IsCompoundFile --> Int8

    + Format is -1 as of Lucene 1.4 and -2 as of XXX. +

    + +

    Version counts how often the index has been changed by adding or deleting documents.

    @@ -756,15 +804,20 @@
    • - When a file named "commit.lock" - is present, a process is currently re-writing the "segments" - file and deleting outdated segment index files, or a process is - reading the "segments" - file and opening the files of the segments it names. This lock file - prevents files from being deleted by another process after a process - has read the "segments" - file but before it has managed to open all of the files of the - segments named therein. + When a file named "commit.lock" is + present, a process is currently re-writing + the "segments" file and deleting outdated + segment index files, or a process is + reading the "segments" file and opening + the files of the segments it names. This + lock file prevents files from being + deleted by another process after a process + has read the "segments" file but before it + has managed to open all of the files of + the segments named therein. As of XXX, + the "commit.lock" is no longer used + because readers are able to open an index + even while a writer is committing.

    • @@ -782,11 +835,15 @@

      - A file named "deletable" - contains the names of files that are no longer used by the index, but - which could not be deleted. This is only used on Win32, where a - file may not be deleted while it is still open. On other platforms - the file contains only null bytes. + A file named "deletable" contains the names of + files that are no longer used by the index, but + which could not be deleted. This is only used on + Win32, where a file may not be deleted while it is + still open. On other platforms the file contains + only null bytes. As of version XXX, there is no + deleteable file. Instead, the deletable files are + computed on creating a writer and then maintained + as an in-memory vector.