Index: src/test/org/apache/lucene/store/MockRAMDirectory.java =================================================================== --- src/test/org/apache/lucene/store/MockRAMDirectory.java (revision 549637) +++ src/test/org/apache/lucene/store/MockRAMDirectory.java (working copy) @@ -24,6 +24,7 @@ import java.util.Random; import java.util.Map; import java.util.HashMap; +import java.util.ArrayList; /** * This is a subclass of RAMDirectory that adds methods @@ -116,6 +117,7 @@ } void maybeThrowIOException() throws IOException { + maybeThrowDeterministicException(); if (randomIOExceptionRate > 0.0) { int number = Math.abs(randomState.nextInt() % 1000); if (number < randomIOExceptionRate*1000) { @@ -213,4 +215,59 @@ } } } + + /** + * Objects that represent failable conditions. Objects of a derived + * class are created and registered with the mock directory. After + * register, each object will be invoked once for each first write + * of a file, giving the object a chance to throw an IOException. + */ + public static class Failure { + /** + * eval is called on the first write of every new file. + */ + public void eval(MockRAMDirectory dir) throws IOException { } + + /** + * reset should set the state of the failure to its default + * (freshly constructued) state. Reset is convenient for tests + * that want to create one failure object and then reuse it in + * multiple cases. This, combineded with the fact that Failure + * subclasses are often anonymous classes makes reset difficult to + * do otherwise. + * + * A typical example of use is + * Failure failure = new Failure() { ... }; + * ... + * mock.failOn(failure.reset()) + */ + public Failure reset() { return this; } + } + + ArrayList failures; + + /** + * add a Failure object to the list of objects to be evaluated + * at every potential failure point + */ + public void failOn(Failure fail) { + if (failures == null) { + failures = new ArrayList(); + } + failures.add(fail); + } + + /** + * Itterate through the failures list, giving each object a + * chance to throw an IOE + */ + void maybeThrowDeterministicException() throws IOException { + if (failures != null) { + for(int i = 0; i < failures.size(); i++) { + ((Failure)failures.get(i)).eval(this); + } + } + } + + } Index: src/test/org/apache/lucene/index/TestIndexWriterDelete.java =================================================================== --- src/test/org/apache/lucene/index/TestIndexWriterDelete.java (revision 549637) +++ src/test/org/apache/lucene/index/TestIndexWriterDelete.java (working copy) @@ -19,6 +19,7 @@ import java.io.IOException; import java.util.Arrays; +import java.lang.StackTraceElement; import junit.framework.TestCase; @@ -534,6 +535,344 @@ } } + // This test tests that buffered deletes are not lost due to i/o errors occuring after the buffered deletes have + // been flushed but before the segmentInfos have been successfully written + + public void testErrorAfterApplyDeletes() throws IOException { + + MockRAMDirectory.Failure failure = new MockRAMDirectory.Failure() { + boolean sawMaybe = false; + boolean failed = false; + public MockRAMDirectory.Failure reset() { + sawMaybe = false; + failed = false; + return this; + } + public void eval(MockRAMDirectory dir) throws IOException { + if (false) { + try { + throw new IOException ("here"); + } catch (IOException ioe) { + ioe.printStackTrace(); + } + } + if (sawMaybe && !failed) { + failed = true; + try { + throw new IOException("fail after maybeApplyDeletes"); + } catch (IOException ioe) { + // ioe.printStackTrace(); + throw ioe; + } + } + if (!failed) { + try { + throw new IOException("in maybeAfterDeletes"); + } catch (IOException ioe) { + StackTraceElement[] trace = ioe.getStackTrace(); + for (int i = 0; i < trace.length; i++) { + if ("maybeApplyDeletes".equals(trace[i].getMethodName())) { + // ioe.printStackTrace(); + sawMaybe = true; + break; + } + } + } + } + } + }; + + // create a couple of files + + String[] keywords = { "1", "2" }; + String[] unindexed = { "Netherlands", "Italy" }; + String[] unstored = { "Amsterdam has lots of bridges", + "Venice has lots of canals" }; + String[] text = { "Amsterdam", "Venice" }; + + for(int pass=0;pass<2;pass++) { + boolean autoCommit = (0==pass); + + Directory ramDir = new RAMDirectory(); + MockRAMDirectory dir = new MockRAMDirectory(ramDir); + IndexWriter modifier = new IndexWriter(dir, autoCommit, + new WhitespaceAnalyzer(), true); + modifier.setUseCompoundFile(true); + modifier.setMaxBufferedDeleteTerms(2); + + dir.failOn(failure.reset()); + + for (int i = 0; i < keywords.length; i++) { + Document doc = new Document(); + doc.add(new Field("id", keywords[i], Field.Store.YES, + Field.Index.UN_TOKENIZED)); + doc.add(new Field("country", unindexed[i], Field.Store.YES, + Field.Index.NO)); + doc.add(new Field("contents", unstored[i], Field.Store.NO, + Field.Index.TOKENIZED)); + doc.add(new Field("city", text[i], Field.Store.YES, + Field.Index.TOKENIZED)); + modifier.addDocument(doc); + } + + // flush (and commit if ac) + + modifier.optimize(); + + // commit if !ac + + if (!autoCommit) { + modifier.close(); + } + + // one of the two files hits + + Term term = new Term("city", "Amsterdam"); + int hitCount = getHitCount(dir, term); + assertEquals(1, hitCount); + + // open the writer again (closed above) + + if (!autoCommit) { + modifier = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer()); + modifier.setUseCompoundFile(true); + } + + // delete the doc + // max buf del terms is two, so this is buffered + + modifier.deleteDocuments(term); + + // add a doc (needed for the !ac case; see below) + // doc remains buffered + + Document doc = new Document(); + modifier.addDocument(doc); + + // flush the changes + // the buffered deletes + // and the new doc + + // The failure object will fail on the first write after the del file gets created + // when processing the buffered delete + + // in the ac case, this will be when writing the new segments files + // so we really don't need the new doc, but it's harmless + + // in the !ac case, a new segments file won't be created + // but in this case, creation of the cfs file happens next + // so we need the doc (to test that it's okay that we don't lose deletes if failing + // while creating the cfs file + + boolean failed = false; + try { + modifier.flush(); + } catch (IOException ioe) { + failed = true; + } + + assertTrue(failed); + + // The flush above failed, so we need to retry it (which will succeed, because the failure is a one-shot + + if (!autoCommit) { + modifier.close(); + } else { + modifier.flush(); + } + + hitCount = getHitCount(dir, term); + + // If we haven't lost the delete the hit count will be zero + + assertEquals(0, hitCount); + + if (autoCommit) { + modifier.close(); + } + + dir.close(); + } + } + + // This test tests that buffered deletes are lost during exception that occur either within or when creating + // a transaction + + public void testErrorDuringTransaction() throws IOException { + + MockRAMDirectory.Failure failure = new MockRAMDirectory.Failure() { + + boolean sawMaybe = false; + boolean failed = false; + + public MockRAMDirectory.Failure reset() { + sawMaybe = false; + failed = false; + return this; + } + public void eval(MockRAMDirectory dir) throws IOException { + if (false) { + try { + throw new IOException ("here"); + } catch (IOException ioe) { + ioe.printStackTrace(); + } + } + if (sawMaybe && !failed) { + failed = true; + try { + throw new IOException("fail after maybeApplyDeletes"); + } catch (IOException ioe) { + // ioe.printStackTrace(); + throw ioe; + } + } + if (!failed) { + try { + throw new IOException("in maybeAfterDeletes"); + } catch (IOException ioe) { + StackTraceElement[] trace = ioe.getStackTrace(); + for (int i = 0; i < trace.length; i++) { + if ("maybeApplyDeletes".equals(trace[i].getMethodName())) { + // ioe.printStackTrace(); + sawMaybe = true; + break; + } + } + } + } + } + + }; + + // add a couple of files + + String[] keywords = { "1", "2" }; + String[] unindexed = { "Netherlands", "Italy" }; + String[] unstored = { "Amsterdam has lots of bridges", + "Venice has lots of canals" }; + String[] text = { "Amsterdam", "Venice" }; + + for(int pass=0;pass<2;pass++) { + boolean autoCommit = (0==pass); + + Directory ramDir = new RAMDirectory(); + MockRAMDirectory dir = new MockRAMDirectory(ramDir); + IndexWriter modifier = new IndexWriter(dir, autoCommit, + new WhitespaceAnalyzer(), true); + modifier.setUseCompoundFile(true); + modifier.setMaxBufferedDeleteTerms(2); + + dir.failOn(failure.reset()); + + // First we add a bunch of documents + // below maxBufferedDocs, so not flushed + + for (int i = 0; i < keywords.length; i++) { + Document doc = new Document(); + doc.add(new Field("id", keywords[i], Field.Store.YES, + Field.Index.UN_TOKENIZED)); + doc.add(new Field("country", unindexed[i], Field.Store.YES, + Field.Index.NO)); + doc.add(new Field("contents", unstored[i], Field.Store.NO, + Field.Index.TOKENIZED)); + doc.add(new Field("city", text[i], Field.Store.YES, + Field.Index.TOKENIZED)); + modifier.addDocument(doc); + } + + // this will cause a flush (and a commit, if ac) + + modifier.optimize(); + + // cause the commit in the !ac case + + if (!autoCommit) { + modifier.close(); + } + + // We should see the one doc added above + + Term term = new Term("city", "Amsterdam"); + int hitCount = getHitCount(dir, term); + assertEquals(1, hitCount); + + if (!autoCommit) { + modifier = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer()); + modifier.setUseCompoundFile(true); + } + + // Now buffer a delete for one of these files + + modifier.deleteDocuments(term); + + // We'll now try to start a trans + // !ac: no problem; nothing gets written + // ac: tries to flush so triggers the failure, + // so we retry the start (which will succeed the second time because the failure is a one-shot + + try { + modifier.startTransaction(); + assert !autoCommit; + } catch (IOException ioe) { + assert autoCommit; + modifier.startTransaction(); + } + + // Now we add the docs again + // again, in both cases (ac, !ac) they're buffered + + for (int i = 0; i < keywords.length; i++) { + Document doc = new Document(); + doc.add(new Field("id", keywords[i], Field.Store.YES, + Field.Index.UN_TOKENIZED)); + doc.add(new Field("country", unindexed[i], Field.Store.YES, + Field.Index.NO)); + doc.add(new Field("contents", unstored[i], Field.Store.NO, + Field.Index.TOKENIZED)); + doc.add(new Field("city", text[i], Field.Store.YES, + Field.Index.TOKENIZED)); + modifier.addDocument(doc); + } + + // Now we try to flush the docs to disk + // in the ac case, this will work fine because it already hit the exception + // and we restarted the trans, so the buffered docs have already been incorporated anyway + // so we manually rollback (rather than part of the except handling) + // in the !ac case, this will trigger the failure and we'll rollback + + try { + + modifier.flush(); + + assert autoCommit; + + modifier.rollbackTransaction(); + + } catch (IOException ioe) { + modifier.rollbackTransaction(); + } + + if (!autoCommit) { + modifier.close(); + } + + hitCount = getHitCount(dir, term); + + // the final result has to have a hit count of zero + // we've added to docs that hit + // the first is gone because of the delete (which better not have gotten lost) + // and the second because we rolledback the trans that contained the add + + assertEquals(0, hitCount); + + if (autoCommit) { + modifier.close(); + } + dir.close(); + } + } + private String arrayToString(String[] l) { String s = ""; for (int i = 0; i < l.length; i++) { Index: src/java/org/apache/lucene/index/IndexWriter.java =================================================================== --- src/java/org/apache/lucene/index/IndexWriter.java (revision 549637) +++ src/java/org/apache/lucene/index/IndexWriter.java (working copy) @@ -221,6 +221,8 @@ private SegmentInfos localRollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails private boolean localAutoCommit; // saved autoCommit during local transaction + private int localNumBufferedDeleteTerms; + private HashMap localBufferedDeleteTerms; private boolean autoCommit = true; // false if we should commit only on close SegmentInfos segmentInfos = new SegmentInfos(); // the segments @@ -1241,9 +1243,11 @@ * commitTransaction() or rollbackTransaction() to finish * the transaction. */ - private void startTransaction() throws IOException { + void startTransaction() throws IOException { localRollbackSegmentInfos = (SegmentInfos) segmentInfos.clone(); localAutoCommit = autoCommit; + localNumBufferedDeleteTerms = numBufferedDeleteTerms; + localBufferedDeleteTerms = bufferedDeleteTerms; if (localAutoCommit) { flushRamSegments(); // Turn off auto-commit during our local transaction: @@ -1258,7 +1262,7 @@ * Rolls back the transaction and restores state to where * we were at the start. */ - private void rollbackTransaction() throws IOException { + void rollbackTransaction() throws IOException { // First restore autoCommit in case we hit an exception below: autoCommit = localAutoCommit; @@ -1271,6 +1275,10 @@ segmentInfos.addAll(localRollbackSegmentInfos); localRollbackSegmentInfos = null; + numBufferedDeleteTerms = localNumBufferedDeleteTerms; + bufferedDeleteTerms = localBufferedDeleteTerms; + localBufferedDeleteTerms = null; + // Ask deleter to locate unreferenced files we had // created & remove them: deleter.checkpoint(segmentInfos, false); @@ -1287,7 +1295,7 @@ * segments file and remove and pending deletions we have * accumulated during the transaction */ - private void commitTransaction() throws IOException { + void commitTransaction() throws IOException { // First restore autoCommit in case we hit an exception below: autoCommit = localAutoCommit; @@ -1341,6 +1349,9 @@ deleter.refresh(); ramSegmentInfos = new SegmentInfos(); + + // okay to clear; not rollbackable anyway + // (see other comments at rollback points) bufferedDeleteTerms.clear(); numBufferedDeleteTerms = 0; @@ -1875,6 +1886,9 @@ SegmentInfos rollback = null; boolean success = false; + HashMap saveBufferedDeleteTerms = null; + int saveNumBufferedDeleteTerms = 0; + // This is try/finally to rollback our internal state // if we hit exception when doing the merge: try { @@ -1908,6 +1922,10 @@ } if (sourceSegments == ramSegmentInfos) { + + saveBufferedDeleteTerms = bufferedDeleteTerms; + saveNumBufferedDeleteTerms = numBufferedDeleteTerms; + maybeApplyDeletes(doMerge); doAfterFlush(); } @@ -1944,6 +1962,10 @@ // same segments_N file -- write once): segmentInfos.clear(); segmentInfos.addAll(rollback); + if (saveBufferedDeleteTerms != null) { + numBufferedDeleteTerms = saveNumBufferedDeleteTerms; + bufferedDeleteTerms = saveBufferedDeleteTerms; + } } // Delete any partially created and now unreferenced files: @@ -2042,7 +2064,13 @@ } // Clean up bufferedDeleteTerms. - bufferedDeleteTerms.clear(); + + // Rollbacks of buffered deletes are based on restoring the old + // map, so don't modify this one. Rare enough that the gc + // overhead is almost certainly lower than the alternate, which + // would be clone to support rollback. + + bufferedDeleteTerms = new HashMap(); numBufferedDeleteTerms = 0; } }