Index: src/test/org/apache/lucene/index/TestIndexWriter.java =================================================================== --- src/test/org/apache/lucene/index/TestIndexWriter.java (revision 605149) +++ src/test/org/apache/lucene/index/TestIndexWriter.java (working copy) @@ -26,6 +26,7 @@ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; @@ -1808,7 +1809,7 @@ if (doFail) { StackTraceElement[] trace = new Exception().getStackTrace(); for (int i = 0; i < trace.length; i++) { - if ("appendPostings".equals(trace[i].getMethodName()) && count++ == 30) { + if ("org.apache.lucene.index.DocumentsWriter".equals(trace[i].getClassName()) && "appendPostings".equals(trace[i].getMethodName()) && count++ == 30) { doFail = false; throw new IOException("now failing during flush"); } @@ -1845,5 +1846,139 @@ writer.close(); IndexReader reader = IndexReader.open(dir); assertEquals(198, reader.docFreq(new Term("content", "aa"))); + reader.close(); } + + private class CrashingFilter extends TokenFilter { + String fieldName; + int count; + + public CrashingFilter(String fieldName, TokenStream input) { + super(input); + this.fieldName = fieldName; + } + + public Token next(Token result) throws IOException { + if (this.fieldName.equals("crash") && count++ >= 4) + throw new IOException("I'm experiencing problems"); + return input.next(result); + } + } + + public void testDocumentsWriterExceptions() throws IOException { + Analyzer analyzer = new Analyzer() { + public TokenStream tokenStream(String fieldName, Reader reader) { + return new CrashingFilter(fieldName, new WhitespaceTokenizer(reader)); + } + }; + + for(int i=0;i<2;i++) { + MockRAMDirectory dir = new MockRAMDirectory(); + IndexWriter writer = new IndexWriter(dir, analyzer); + //writer.setInfoStream(System.out); + Document doc = new Document(); + doc.add(new Field("contents", "here are some contents", Field.Store.YES, + Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + writer.addDocument(doc); + writer.addDocument(doc); + doc.add(new Field("crash", "this should crash after 4 terms", Field.Store.YES, + Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + doc.add(new Field("other", "this will not get indexed", Field.Store.YES, + Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + try { + writer.addDocument(doc); + fail("did not hit expected exception"); + } catch (IOException ioe) { + } + + if (0 == i) { + doc = new Document(); + doc.add(new Field("contents", "here are some contents", Field.Store.YES, + Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + writer.addDocument(doc); + writer.addDocument(doc); + } + writer.close(); + + IndexReader reader = IndexReader.open(dir); + int expected = 3+(1-i)*2; + assertEquals(expected, reader.docFreq(new Term("contents", "here"))); + assertEquals(expected, reader.maxDoc()); + for(int j=0;j number mappings match the "merged" + // FieldInfos, then we can do a bulk copy of the + // stored fields: for (int i = 0; i < readers.size(); i++) { IndexReader reader = (IndexReader) readers.elementAt(i); - boolean same = reader.getFieldNames(IndexReader.FieldOption.ALL).size() == fieldInfos.size() && reader instanceof SegmentReader; - if (same) { + if (reader instanceof SegmentReader) { SegmentReader segmentReader = (SegmentReader) reader; - for (int j = 0; same && j < fieldInfos.size(); j++) - same = fieldInfos.fieldName(j).equals(segmentReader.getFieldInfos().fieldName(j)); - if (same) + boolean same = true; + FieldInfos segmentFieldInfos = segmentReader.getFieldInfos(); + for (int j = 0; same && j < segmentFieldInfos.size(); j++) + same = fieldInfos.fieldName(j).equals(segmentFieldInfos.fieldName(j)); + if (same) { matchingSegmentReaders[i] = segmentReader; + } } } Index: src/java/org/apache/lucene/index/DocumentsWriter.java =================================================================== --- src/java/org/apache/lucene/index/DocumentsWriter.java (revision 605149) +++ src/java/org/apache/lucene/index/DocumentsWriter.java (working copy) @@ -562,13 +562,14 @@ // If we hit an exception while appending to the // stored fields or term vectors files, we have to - // abort because it means those files are possibly - // inconsistent. + // abort all documents since we last flushed because + // it means those files are possibly inconsistent. abortOnExc = true; // Append stored fields to the real FieldsWriter: - fieldsWriter.flushDocument(fdtLocal); + fieldsWriter.flushDocument(numStoredFields, fdtLocal); fdtLocal.reset(); + numStoredFields = 0; // Append term vectors to the real outputs: if (tvx != null) { @@ -589,7 +590,6 @@ tvfLocal.reset(); } } - abortOnExc = false; // Append norms for the fields we saw: for(int i=0;i 0) { // Add term vectors for this field - writeVectors(fieldInfo); - if (postingsVectorsUpto > maxPostingsVectors) - maxPostingsVectors = postingsVectorsUpto; - postingsVectorsUpto = 0; - vectorsPool.reset(); + boolean success = false; + try { + writeVectors(fieldInfo); + success = true; + } finally { + if (!success) { + // If we hit an exception inside + // writeVectors, the contents of tvfLocal + // can be corrupt, so we must discard all + // term vectors for this document: + numVectorFields = 0; + tvfLocal.reset(); + } + if (postingsVectorsUpto > maxPostingsVectors) + maxPostingsVectors = postingsVectorsUpto; + postingsVectorsUpto = 0; + vectorsPool.reset(); + } } } } @@ -1449,7 +1481,8 @@ // If we hit an exception below, it's possible the // posting list or term vectors data will be // partially written and thus inconsistent if - // flushed, so we have to abort: + // flushed, so we have to abort all documents + // since the last flush: abortOnExc = true; if (p != null) { // term seen since last flush @@ -2243,12 +2276,12 @@ boolean success = false; int maxTermHit; try { - // This call is not synchronized and does all the work try { + // This call is not synchronized and does all the work state.processDocument(analyzer); } finally { maxTermHit = state.maxTermHit; - // This call synchronized but fast + // This call is synchronized but fast finishDocument(state); } success = true;