Index: CHANGES.txt
===================================================================
--- CHANGES.txt (revision 485730)
+++ CHANGES.txt (working copy)
@@ -240,6 +240,15 @@
"The handle is invalid" IOExceptions on Windows when trying to
close readers or writers. (Michael Busch via Mike McCandless).
+26. LUCENE-702: Fix IndexWriter.addIndexes(*) to not corrupt the index
+ on any exceptions (eg disk full). The semantics of these methods
+ is now transactional: either all indices are merged or none are.
+ Also fixed IndexWriter.mergeSegments (called outside of
+ addIndexes(*) by addDocument, optimize, flushRamSegments) and
+ IndexReader.commit() (called by close) to clean up and keep the
+ instance state consistent to what's actually in the index (Mike
+ McCandless).
+
Optimizations
1. LUCENE-586: TermDocs.skipTo() is now more efficient for
Index: src/test/org/apache/lucene/store/MockRAMOutputStream.java
===================================================================
--- src/test/org/apache/lucene/store/MockRAMOutputStream.java (revision 0)
+++ src/test/org/apache/lucene/store/MockRAMOutputStream.java (revision 0)
@@ -0,0 +1,83 @@
+package org.apache.lucene.store;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Iterator;
+
+/**
+ * Used by MockRAMDirectory to create an output stream that
+ * will throw an IOException on fake disk full, track max
+ * disk space actually used, and maybe throw random
+ * IOExceptions.
+ */
+
+public class MockRAMOutputStream extends RAMOutputStream {
+ private MockRAMDirectory dir;
+ private boolean first=true;
+
+ /** Construct an empty output buffer. */
+ public MockRAMOutputStream(MockRAMDirectory dir, RAMFile f) {
+ super(f);
+ this.dir = dir;
+ }
+
+ public void close() throws IOException {
+ super.close();
+
+ // Now compute actual disk usage & track the maxUsedSize
+ // in the MockRAMDirectory:
+ long size = dir.getRecomputedActualSizeInBytes();
+ if (size > dir.maxUsedSize) {
+ dir.maxUsedSize = size;
+ }
+ }
+
+ public void flushBuffer(byte[] src, int len) throws IOException {
+ long freeSpace = dir.maxSize - dir.sizeInBytes();
+ long realUsage = 0;
+
+ // Enforce disk full:
+ if (dir.maxSize != 0 && freeSpace <= len) {
+ // Compute the real disk free. This will greatly slow
+ // down our test but makes it more accurate:
+ realUsage = dir.getRecomputedActualSizeInBytes();
+ freeSpace = dir.maxSize - realUsage;
+ }
+
+ if (dir.maxSize != 0 && freeSpace <= len) {
+ if (freeSpace > 0 && freeSpace < len) {
+ realUsage += freeSpace;
+ super.flushBuffer(src, (int) freeSpace);
+ }
+ if (realUsage > dir.maxUsedSize) {
+ dir.maxUsedSize = realUsage;
+ }
+ throw new IOException("fake disk full at " + dir.sizeInBytes() + " bytes");
+ } else {
+ super.flushBuffer(src, len);
+ }
+
+ if (first) {
+ // Maybe throw random exception; only do this on first
+ // write to a new file:
+ first = false;
+ dir.maybeThrowIOException();
+ }
+ }
+}
Index: src/test/org/apache/lucene/store/MockRAMDirectory.java
===================================================================
--- src/test/org/apache/lucene/store/MockRAMDirectory.java (revision 0)
+++ src/test/org/apache/lucene/store/MockRAMDirectory.java (revision 0)
@@ -0,0 +1,130 @@
+package org.apache.lucene.store;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.File;
+import java.util.Iterator;
+import java.util.Random;
+
+/**
+ * This is a subclass of RAMDirectory that adds methods
+ * intented to be used only by unit tests.
+ * @version $Id: RAMDirectory.java 437897 2006-08-29 01:13:10Z yonik $
+ */
+
+public class MockRAMDirectory extends RAMDirectory {
+ long maxSize;
+
+ // Max actual bytes used. This is set by MockRAMOutputStream:
+ long maxUsedSize;
+ double randomIOExceptionRate;
+ Random randomState;
+
+ public MockRAMDirectory() throws IOException {
+ super();
+ }
+ public MockRAMDirectory(String dir) throws IOException {
+ super(dir);
+ }
+ public MockRAMDirectory(Directory dir) throws IOException {
+ super(dir);
+ }
+ public MockRAMDirectory(File dir) throws IOException {
+ super(dir);
+ }
+
+ public void setMaxSizeInBytes(long maxSize) {
+ this.maxSize = maxSize;
+ }
+ public long getMaxSizeInBytes() {
+ return this.maxSize;
+ }
+
+ /**
+ * Returns the peek actual storage used (bytes) in this
+ * directory.
+ */
+ public long getMaxUsedSizeInBytes() {
+ return this.maxUsedSize;
+ }
+ public void resetMaxUsedSizeInBytes() {
+ this.maxUsedSize = getRecomputedActualSizeInBytes();
+ }
+
+ /**
+ * If 0.0, no exceptions will be thrown. Else this should
+ * be a double 0.0 - 1.0. We will randomly throw an
+ * IOException on the first write to an OutputStream based
+ * on this probability.
+ */
+ public void setRandomIOExceptionRate(double rate, long seed) {
+ randomIOExceptionRate = rate;
+ // seed so we have deterministic behaviour:
+ randomState = new Random(seed);
+ }
+ public double getRandomIOExceptionRate() {
+ return randomIOExceptionRate;
+ }
+
+ void maybeThrowIOException() throws IOException {
+ if (randomIOExceptionRate > 0.0) {
+ int number = Math.abs(randomState.nextInt() % 1000);
+ if (number < randomIOExceptionRate*1000) {
+ throw new IOException("a random IOException");
+ }
+ }
+ }
+
+ public IndexOutput createOutput(String name) {
+ RAMFile file = new RAMFile(this);
+ synchronized (this) {
+ RAMFile existing = (RAMFile)fileMap.get(name);
+ if (existing!=null) {
+ sizeInBytes -= existing.sizeInBytes;
+ existing.directory = null;
+ }
+ fileMap.put(name, file);
+ }
+
+ return new MockRAMOutputStream(this, file);
+ }
+
+ /** Provided for testing purposes. Use sizeInBytes() instead. */
+ public synchronized final long getRecomputedSizeInBytes() {
+ long size = 0;
+ Iterator it = files.iterator();
+ while (it.hasNext())
+ size += ((RAMFile) it.next()).getSizeInBytes();
+ return size;
+ }
+
+ /** Like getRecomputedSizeInBytes(), but, uses actual file
+ * lengths rather than buffer allocations (which are
+ * quantized up to nearest
+ * BufferedIndexOutput.BUFFER_SIZE (now 1024) bytes.
+ */
+
+ final long getRecomputedActualSizeInBytes() {
+ long size = 0;
+ Iterator it = files.iterator();
+ while (it.hasNext())
+ size += ((RAMFile) it.next()).length;
+ return size;
+ }
+}
Index: src/test/org/apache/lucene/index/TestIndexReader.java
===================================================================
--- src/test/org/apache/lucene/index/TestIndexReader.java (revision 485632)
+++ src/test/org/apache/lucene/index/TestIndexReader.java (working copy)
@@ -30,11 +30,18 @@
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Hits;
+import org.apache.lucene.search.TermQuery;
+
import java.util.Collection;
+import java.util.Arrays;
import java.io.IOException;
import java.io.FileNotFoundException;
import java.io.File;
+import org.apache.lucene.store.MockRAMDirectory;
+
public class TestIndexReader extends TestCase
{
/** Main for running test case by itself. */
@@ -547,7 +554,213 @@
public void testDeleteReaderReaderConflictOptimized() throws IOException{
deleteReaderReaderConflict(true);
}
+
+ /**
+ * Make sure if reader tries to commit but hits disk
+ * full that reader remains consistent and usable.
+ */
+ public void testDiskFull() throws IOException {
+
+ boolean debug = false;
+ Term searchTerm = new Term("content", "aaa");
+ int START_COUNT = 157;
+ int END_COUNT = 144;
+
+ // First build up a starting index:
+ RAMDirectory startDir = new RAMDirectory();
+ IndexWriter writer = new IndexWriter(startDir, new WhitespaceAnalyzer(), true);
+ for(int i=0;i<157;i++) {
+ Document d = new Document();
+ d.add(new Field("id", Integer.toString(i), Field.Store.YES, Field.Index.UN_TOKENIZED));
+ d.add(new Field("content", "aaa " + i, Field.Store.NO, Field.Index.TOKENIZED));
+ writer.addDocument(d);
+ }
+ writer.close();
+
+ long diskUsage = startDir.sizeInBytes();
+ long diskFree = diskUsage+100;
+
+ IOException err = null;
+
+ boolean done = false;
+
+ // Iterate w/ ever increasing free disk space:
+ while(!done) {
+ MockRAMDirectory dir = new MockRAMDirectory(startDir);
+ IndexReader reader = IndexReader.open(dir);
+
+ // For each disk size, first try to commit against
+ // dir that will hit random IOExceptions & disk
+ // full; after, give it infinite disk space & turn
+ // off random IOExceptions & retry w/ same reader:
+ boolean success = false;
+
+ for(int x=0;x<2;x++) {
+
+ double rate = 0.05;
+ double diskRatio = ((double) diskFree)/diskUsage;
+ long thisDiskFree;
+ String testName;
+
+ if (0 == x) {
+ thisDiskFree = diskFree;
+ if (diskRatio >= 2.0) {
+ rate /= 2;
+ }
+ if (diskRatio >= 4.0) {
+ rate /= 2;
+ }
+ if (diskRatio >= 6.0) {
+ rate = 0.0;
+ }
+ if (debug) {
+ System.out.println("\ncycle: " + diskFree + " bytes");
+ }
+ testName = "disk full during reader.close() @ " + thisDiskFree + " bytes";
+ } else {
+ thisDiskFree = 0;
+ rate = 0.0;
+ if (debug) {
+ System.out.println("\ncycle: same writer: unlimited disk space");
+ }
+ testName = "reader re-use after disk full";
+ }
+
+ dir.setMaxSizeInBytes(thisDiskFree);
+ dir.setRandomIOExceptionRate(rate, diskFree);
+
+ try {
+ if (0 == x) {
+ int docId = 12;
+ for(int i=0;i<13;i++) {
+ reader.deleteDocument(docId);
+ reader.setNorm(docId, "contents", (float) 2.0);
+ docId += 12;
+ }
+ }
+ reader.close();
+ success = true;
+ if (0 == x) {
+ done = true;
+ }
+ } catch (IOException e) {
+ if (debug) {
+ System.out.println(" hit IOException: " + e);
+ }
+ err = e;
+ if (1 == x) {
+ e.printStackTrace();
+ fail(testName + " hit IOException after disk space was freed up");
+ }
+ }
+
+ // Whether we succeeded or failed, check that all
+ // un-referenced files were in fact deleted (ie,
+ // we did not create garbage). Just create a
+ // new IndexFileDeleter, have it delete
+ // unreferenced files, then verify that in fact
+ // no files were deleted:
+ String[] startFiles = dir.list();
+ SegmentInfos infos = new SegmentInfos();
+ infos.read(dir);
+ IndexFileDeleter d = new IndexFileDeleter(infos, dir);
+ d.findDeletableFiles();
+ d.deleteFiles();
+ String[] endFiles = dir.list();
+
+ Arrays.sort(startFiles);
+ Arrays.sort(endFiles);
+
+ //for(int i=0;i
If an Exception is hit during optimize() (eg, due to + * disk full), the index will not be corrupted. However + * it's possible that one of the segments in the index + * will be in non-CFS format even when using compound file + * format. This will occur when the Exception is hit + * during conversion of the segment into compound + * format.
+ */ public synchronized void optimize() throws IOException { flushRamSegments(); while (segmentInfos.size() > 1 || @@ -579,6 +607,80 @@ } } + /* + * Begin a transaction. During a transaction, any segment + * merges that happen (or ram segments flushed) will not + * write a new segments file and will not remove any files + * that were present at the start of the transaction. You + * must make a matched (try/finall) call to + * commitTransaction() or rollbackTransaction() to finish + * the transaction. + */ + private void startTransaction() throws IOException { + if (inTransaction) { + throw new IOException("transaction is already in process"); + } + rollbackSegmentInfos = (SegmentInfos) segmentInfos.clone(); + protectedSegments = new HashSet(); + for(int i=0;iAfter this completes, the index is optimized. */ + *
After this completes, the index is optimized. + * + *
This method is transactional in how Exceptions are + * handled: it does not commit a new segments_N file until + * all indexes are added. This means if an Exception + * occurs (eg disk full), then either no indexes will have + * been added or they all will have been.
+ * + *If an Exception is hit, it's still possible that all + * indexes were successfully added. This happens when the + * Exception is hit when trying to build a CFS file. In + * this case, one segment in the index will be in non-CFS + * format, even when using compound file format.
+ * + *Also note that on an exception, the index may still + * have been partially or fully optimized even though none + * of the input indexes were added.
+ * + *Note that this requires temporary free space in the + * Directory up to 2X the sum of all input indexes + * (including the starting index). Exact usage could be + * less but will depend on many factors.
+ * + *See LUCENE-702 + * for details.
+ */ public synchronized void addIndexes(Directory[] dirs) - throws IOException { + throws IOException { + optimize(); // start with zero or 1 seg int start = segmentInfos.size(); - for (int i = 0; i < dirs.length; i++) { - SegmentInfos sis = new SegmentInfos(); // read infos from dir - sis.read(dirs[i]); - for (int j = 0; j < sis.size(); j++) { - segmentInfos.addElement(sis.info(j)); // add each info + boolean success = false; + + startTransaction(); + + try { + for (int i = 0; i < dirs.length; i++) { + SegmentInfos sis = new SegmentInfos(); // read infos from dir + sis.read(dirs[i]); + for (int j = 0; j < sis.size(); j++) { + segmentInfos.addElement(sis.info(j)); // add each info + } } - } - // merge newly added segments in log(n) passes - while (segmentInfos.size() > start+mergeFactor) { - for (int base = start; base < segmentInfos.size(); base++) { - int end = Math.min(segmentInfos.size(), base+mergeFactor); - if (end-base > 1) - mergeSegments(segmentInfos, base, end); + // merge newly added segments in log(n) passes + while (segmentInfos.size() > start+mergeFactor) { + for (int base = start; base < segmentInfos.size(); base++) { + int end = Math.min(segmentInfos.size(), base+mergeFactor); + if (end-base > 1) { + mergeSegments(segmentInfos, base, end); + } + } } + success = true; + } finally { + if (success) { + commitTransaction(); + } else { + rollbackTransaction(); + } } optimize(); // final cleanup @@ -623,6 +766,11 @@ ** This requires this index not be among those to be added, and the * upper bound* of those segment doc counts not exceed maxMergeDocs. + * + *
See {@link #addIndexes(Directory[])} for + * details on transactional semantics, temporary free + * space required in the Directory, and non-CFS segments + * on an Exception.
*/ public synchronized void addIndexesNoOptimize(Directory[] dirs) throws IOException { @@ -651,96 +799,114 @@ // and target may use compound file or not. So we use mergeSegments() to // copy a segment, which may cause doc count to change because deleted // docs are garbage collected. - // - // In current addIndexes(Directory[]), segment infos in S are added to - // T's "segmentInfos" upfront. Then segments in S are merged to T several - // at a time. Every merge is committed with T's "segmentInfos". So if - // a reader is opened on T while addIndexes() is going on, it could see - // an inconsistent index. AddIndexesNoOptimize() has a similar behaviour. // 1 flush ram segments + flushRamSegments(); // 2 copy segment infos and find the highest level from dirs int start = segmentInfos.size(); int startUpperBound = minMergeDocs; + boolean success = false; + + startTransaction(); + try { - for (int i = 0; i < dirs.length; i++) { - if (directory == dirs[i]) { - // cannot add this index: segments may be deleted in merge before added - throw new IllegalArgumentException("Cannot add this index to itself"); - } - SegmentInfos sis = new SegmentInfos(); // read infos from dir - sis.read(dirs[i]); - for (int j = 0; j < sis.size(); j++) { - SegmentInfo info = sis.info(j); - segmentInfos.addElement(info); // add each info + try { + for (int i = 0; i < dirs.length; i++) { + if (directory == dirs[i]) { + // cannot add this index: segments may be deleted in merge before added + throw new IllegalArgumentException("Cannot add this index to itself"); + } - while (startUpperBound < info.docCount) { - startUpperBound *= mergeFactor; // find the highest level from dirs - if (startUpperBound > maxMergeDocs) { - // upper bound cannot exceed maxMergeDocs - throw new IllegalArgumentException("Upper bound cannot exceed maxMergeDocs"); + SegmentInfos sis = new SegmentInfos(); // read infos from dir + sis.read(dirs[i]); + for (int j = 0; j < sis.size(); j++) { + SegmentInfo info = sis.info(j); + segmentInfos.addElement(info); // add each info + + while (startUpperBound < info.docCount) { + startUpperBound *= mergeFactor; // find the highest level from dirs + if (startUpperBound > maxMergeDocs) { + // upper bound cannot exceed maxMergeDocs + throw new IllegalArgumentException("Upper bound cannot exceed maxMergeDocs"); + } } } } + } catch (IllegalArgumentException e) { + for (int i = segmentInfos.size() - 1; i >= start; i--) { + segmentInfos.remove(i); + } + throw e; } - } catch (IllegalArgumentException e) { - for (int i = segmentInfos.size() - 1; i >= start; i--) { - segmentInfos.remove(i); - } - throw e; - } - // 3 maybe merge segments starting from the highest level from dirs - maybeMergeSegments(startUpperBound); + // 3 maybe merge segments starting from the highest level from dirs + maybeMergeSegments(startUpperBound); - // get the tail segments whose levels <= h - int segmentCount = segmentInfos.size(); - int numTailSegments = 0; - while (numTailSegments < segmentCount - && startUpperBound >= segmentInfos.info(segmentCount - 1 - numTailSegments).docCount) { - numTailSegments++; - } - if (numTailSegments == 0) { - return; - } - - // 4 make sure invariants hold for the tail segments whose levels <= h - if (checkNonDecreasingLevels(segmentCount - numTailSegments)) { - // identify the segments from S to be copied (not merged in 3) - int numSegmentsToCopy = 0; - while (numSegmentsToCopy < segmentCount - && directory != segmentInfos.info(segmentCount - 1 - numSegmentsToCopy).dir) { - numSegmentsToCopy++; + // get the tail segments whose levels <= h + int segmentCount = segmentInfos.size(); + int numTailSegments = 0; + while (numTailSegments < segmentCount + && startUpperBound >= segmentInfos.info(segmentCount - 1 - numTailSegments).docCount) { + numTailSegments++; } - if (numSegmentsToCopy == 0) { + if (numTailSegments == 0) { + success = true; return; } - // copy those segments from S - for (int i = segmentCount - numSegmentsToCopy; i < segmentCount; i++) { - mergeSegments(segmentInfos, i, i + 1); + // 4 make sure invariants hold for the tail segments whose levels <= h + if (checkNonDecreasingLevels(segmentCount - numTailSegments)) { + // identify the segments from S to be copied (not merged in 3) + int numSegmentsToCopy = 0; + while (numSegmentsToCopy < segmentCount + && directory != segmentInfos.info(segmentCount - 1 - numSegmentsToCopy).dir) { + numSegmentsToCopy++; + } + if (numSegmentsToCopy == 0) { + success = true; + return; + } + + // copy those segments from S + for (int i = segmentCount - numSegmentsToCopy; i < segmentCount; i++) { + mergeSegments(segmentInfos, i, i + 1); + } + if (checkNonDecreasingLevels(segmentCount - numSegmentsToCopy)) { + success = true; + return; + } } - if (checkNonDecreasingLevels(segmentCount - numSegmentsToCopy)) { - return; + + // invariants do not hold, simply merge those segments + mergeSegments(segmentInfos, segmentCount - numTailSegments, segmentCount); + + // maybe merge segments again if necessary + if (segmentInfos.info(segmentInfos.size() - 1).docCount > startUpperBound) { + maybeMergeSegments(startUpperBound * mergeFactor); } - } - // invariants do not hold, simply merge those segments - mergeSegments(segmentInfos, segmentCount - numTailSegments, segmentCount); - - // maybe merge segments again if necessary - if (segmentInfos.info(segmentInfos.size() - 1).docCount > startUpperBound) { - maybeMergeSegments(startUpperBound * mergeFactor); + success = true; + } finally { + if (success) { + commitTransaction(); + } else { + rollbackTransaction(); + } } } /** Merges the provided indexes into this index. *After this completes, the index is optimized.
*The provided IndexReaders are not closed.
+ + *See {@link #addIndexes(Directory[])} for + * details on transactional semantics, temporary free + * space required in the Directory, and non-CFS segments + * on an Exception.
*/ public synchronized void addIndexes(IndexReader[] readers) throws IOException { @@ -761,27 +927,62 @@ for (int i = 0; i < readers.length; i++) // add new indexes merger.add(readers[i]); - int docCount = merger.merge(); // merge 'em + SegmentInfo info; - segmentInfos.setSize(0); // pop old infos & add new - SegmentInfo info = new SegmentInfo(mergedName, docCount, directory, false); - segmentInfos.addElement(info); + String segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName(); - if(sReader != null) + boolean success = false; + + startTransaction(); + + try { + int docCount = merger.merge(); // merge 'em + + segmentInfos.setSize(0); // pop old infos & add new + info = new SegmentInfo(mergedName, docCount, directory, false); + segmentInfos.addElement(info); + commitPending = true; + + if(sReader != null) sReader.close(); - String segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName(); - segmentInfos.write(directory); // commit changes + success = true; + } finally { + if (!success) { + rollbackTransaction(); + } else { + commitTransaction(); + } + } + deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file deleter.deleteSegments(segmentsToDelete); // delete now-unused segments if (useCompoundFile) { - Vector filesToDelete = merger.createCompoundFile(mergedName + ".cfs"); + success = false; + segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName(); - info.setUseCompoundFile(true); - segmentInfos.write(directory); // commit again so readers know we've switched this segment to a compound file + Vector filesToDelete; + startTransaction(); + + try { + + filesToDelete = merger.createCompoundFile(mergedName + ".cfs"); + + info.setUseCompoundFile(true); + commitPending = true; + success = true; + + } finally { + if (!success) { + rollbackTransaction(); + } else { + commitTransaction(); + } + } + deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file deleter.deleteFiles(filesToDelete); // delete now unused files of segment } @@ -884,6 +1085,7 @@ // mergeFactor and/or maxBufferedDocs change(s) while (numSegments >= mergeFactor) { // merge the leftmost* mergeFactor segments + int docCount = mergeSegments(segmentInfos, minSegment, minSegment + mergeFactor); numSegments -= mergeFactor; @@ -921,51 +1123,154 @@ SegmentMerger merger = new SegmentMerger(this, mergedName); final Vector segmentsToDelete = new Vector(); - for (int i = minSegment; i < end; i++) { - SegmentInfo si = sourceSegments.info(i); - if (infoStream != null) - infoStream.print(" " + si.name + " (" + si.docCount + " docs)"); - IndexReader reader = SegmentReader.get(si); - merger.add(reader); - if ((reader.directory() == this.directory) || // if we own the directory - (reader.directory() == this.ramDirectory)) - segmentsToDelete.addElement(reader); // queue segment for deletion - } - int mergedDocCount = merger.merge(); + String segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName(); + String nextSegmentsFileName = segmentInfos.getNextSegmentFileName(); - if (infoStream != null) { - infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)"); + SegmentInfo newSegment = null; + + int mergedDocCount; + + // This is try/finally to make sure merger's readers are closed: + try { + + for (int i = minSegment; i < end; i++) { + SegmentInfo si = sourceSegments.info(i); + if (infoStream != null) + infoStream.print(" " + si.name + " (" + si.docCount + " docs)"); + IndexReader reader = SegmentReader.get(si); + merger.add(reader); + if ((reader.directory() == this.directory) || // if we own the directory + (reader.directory() == this.ramDirectory)) + segmentsToDelete.addElement(reader); // queue segment for deletion + } + + SegmentInfos rollback = null; + boolean success = false; + + // This is try/finally to rollback our internal state + // if we hit exception when doing the merge: + try { + + mergedDocCount = merger.merge(); + + if (infoStream != null) { + infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)"); + } + + newSegment = new SegmentInfo(mergedName, mergedDocCount, + directory, false); + + + if (sourceSegments == ramSegmentInfos) { + segmentInfos.addElement(newSegment); + } else { + + if (!inTransaction) { + // Now save the SegmentInfo instances that + // we are replacing: + rollback = (SegmentInfos) segmentInfos.clone(); + } + + for (int i = end-1; i > minSegment; i--) // remove old infos & add new + sourceSegments.remove(i); + + segmentInfos.set(minSegment, newSegment); + } + + if (!inTransaction) { + segmentInfos.write(directory); // commit before deleting + } else { + commitPending = true; + } + + success = true; + + } finally { + + if (success) { + // The non-ram-segments case is already committed + // (above), so all the remains for ram segments case + // is to clear the ram segments: + if (sourceSegments == ramSegmentInfos) { + ramSegmentInfos.removeAllElements(); + } + } else if (!inTransaction) { + + // Must rollback so our state matches index: + + if (sourceSegments == ramSegmentInfos) { + // Simple case: newSegment may or may not have + // been added to the end of our segment infos, + // so just check & remove if so: + if (newSegment != null && + segmentInfos.size() > 0 && + segmentInfos.info(segmentInfos.size()-1) == newSegment) { + segmentInfos.remove(segmentInfos.size()-1); + } + } else if (rollback != null) { + // Rollback the individual SegmentInfo + // instances, but keep original SegmentInfos + // instance (so we don't try to write again the + // same segments_N file -- write once): + segmentInfos.clear(); + segmentInfos.addAll(rollback); + } + + // Delete any partially created files: + deleter.deleteFile(nextSegmentsFileName); + deleter.findDeletableFiles(); + deleter.deleteFiles(); + } + } + } finally { + // close readers before we attempt to delete now-obsolete segments + merger.closeReaders(); } - SegmentInfo newSegment = new SegmentInfo(mergedName, mergedDocCount, - directory, false); - if (sourceSegments == ramSegmentInfos) { - sourceSegments.removeAllElements(); - segmentInfos.addElement(newSegment); + if (!inTransaction) { + deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file + deleter.deleteSegments(segmentsToDelete); // delete now-unused segments } else { - for (int i = end-1; i > minSegment; i--) // remove old infos & add new - sourceSegments.remove(i); - segmentInfos.set(minSegment, newSegment); + deleter.addPendingFile(segmentsInfosFileName); // delete old segments_N file + deleter.deleteSegments(segmentsToDelete, protectedSegments); // delete now-unused segments } - // close readers before we attempt to delete now-obsolete segments - merger.closeReaders(); + if (useCompoundFile) { - String segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName(); - segmentInfos.write(directory); // commit before deleting + segmentsInfosFileName = nextSegmentsFileName; + nextSegmentsFileName = segmentInfos.getNextSegmentFileName(); - deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file - deleter.deleteSegments(segmentsToDelete); // delete now-unused segments + Vector filesToDelete; - if (useCompoundFile) { - Vector filesToDelete = merger.createCompoundFile(mergedName + ".cfs"); + boolean success = false; - segmentsInfosFileName = segmentInfos.getCurrentSegmentFileName(); - newSegment.setUseCompoundFile(true); - segmentInfos.write(directory); // commit again so readers know we've switched this segment to a compound file + try { - deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file + filesToDelete = merger.createCompoundFile(mergedName + ".cfs"); + newSegment.setUseCompoundFile(true); + if (!inTransaction) { + segmentInfos.write(directory); // commit again so readers know we've switched this segment to a compound file + } + success = true; + + } finally { + if (!success && !inTransaction) { + // Must rollback: + newSegment.setUseCompoundFile(false); + deleter.deleteFile(mergedName + ".cfs"); + deleter.deleteFile(nextSegmentsFileName); + } + } + + if (!inTransaction) { + deleter.deleteFile(segmentsInfosFileName); // delete old segments_N file + } + + // We can delete these segments whether or not we are + // in a transaction because we had just written them + // above so they can't need protection by the + // transaction: deleter.deleteFiles(filesToDelete); // delete now-unused segments } Index: src/java/org/apache/lucene/index/IndexFileDeleter.java =================================================================== --- src/java/org/apache/lucene/index/IndexFileDeleter.java (revision 485632) +++ src/java/org/apache/lucene/index/IndexFileDeleter.java (working copy) @@ -26,6 +26,8 @@ import java.io.PrintStream; import java.util.Vector; import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; /** * A utility class (used by both IndexReader and @@ -35,7 +37,7 @@ */ public class IndexFileDeleter { private Vector deletable; - private Vector pending; + private HashSet pending; private Directory directory; private SegmentInfos segmentInfos; private PrintStream infoStream; @@ -45,6 +47,12 @@ this.segmentInfos = segmentInfos; this.directory = directory; } + void setSegmentInfos(SegmentInfos segmentInfos) { + this.segmentInfos = segmentInfos; + } + SegmentInfos getSegmentInfos() { + return segmentInfos; + } void setInfoStream(PrintStream infoStream) { this.infoStream = infoStream; @@ -134,6 +142,10 @@ // This is an orphan'd separate norms file: doDelete = true; } + } else if ("cfs".equals(extension) && !info.getUseCompoundFile()) { + // This is a partially written + // _segmentName.cfs: + doDelete = true; } } } @@ -167,6 +179,30 @@ deleteFiles(reader.files(), reader.directory()); // delete other files } } + + /** + * Delete these segments, as long as they are not listed + * in protectedSegments. If they are, then, instead, add + * them to the pending set. + */ + + public final void deleteSegments(Vector segments, HashSet protectedSegments) throws IOException { + + deleteFiles(); // try to delete files that we couldn't before + + for (int i = 0; i < segments.size(); i++) { + SegmentReader reader = (SegmentReader)segments.elementAt(i); + if (reader.directory() == this.directory) { + if (protectedSegments.contains(reader.getSegmentName())) { + addPendingFiles(reader.files()); // record these for deletion on commit + } else { + deleteFiles(reader.files()); // try to delete our files + } + } else { + deleteFiles(reader.files(), reader.directory()); // delete other files + } + } + } public final void deleteFiles(Vector files, Directory directory) throws IOException { @@ -199,22 +235,51 @@ pending = null; } + /* + Record that the files for these segments should be + deleted, once the pending deletes are committed. + */ + final void addPendingSegments(Vector segments) throws IOException { + for (int i = 0; i < segments.size(); i++) { + SegmentReader reader = (SegmentReader)segments.elementAt(i); + if (reader.directory() == this.directory) { + addPendingFiles(reader.files()); + } + } + } + + /* + Record list of files for deletion, but do not delete + them until commitPendingFiles is called. + */ + final void addPendingFiles(Vector files) { + for(int i=0;i