diff -r 2349e7ad1056 lucene/CHANGES.txt --- a/lucene/CHANGES.txt Tue Sep 21 05:04:19 2010 +0000 +++ b/lucene/CHANGES.txt Tue Sep 21 12:21:08 2010 -0400 @@ -529,6 +529,10 @@ * LUCENE-2650: Added extra safety to MMapIndexInput clones to prevent accessing an unmapped buffer if the input is closed (Mike McCandless, Uwe Schindler, Robert Muir) +* LUCENE-2658: Exceptions while processing term vectors enabled for multiple + fields could lead to invalid ArrayIndexOutOfBoundsExceptions. + (Robert Muir, Mike McCandless) + New features * LUCENE-2128: Parallelized fetching document frequencies during weight diff -r 2349e7ad1056 lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java --- a/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java Tue Sep 21 05:04:19 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java Tue Sep 21 12:21:08 2010 -0400 @@ -76,14 +76,16 @@ assert perThread.doc.numVectorFields == 0; assert 0 == perThread.doc.perDocTvf.length(); assert 0 == perThread.doc.perDocTvf.getFilePointer(); - } else { - assert perThread.doc.docID == docState.docID; + } - if (termsHashPerField.numPostings != 0) - // Only necessary if previous doc hit a - // non-aborting exception while writing vectors in - // this field: - termsHashPerField.reset(); + assert perThread.doc.docID == docState.docID; + + if (termsHashPerField.numPostings != 0) { + // Only necessary if previous doc hit a + // non-aborting exception while writing vectors in + // this field: + termsHashPerField.reset(); + perThread.termsHashPerThread.reset(false); } } @@ -98,7 +100,7 @@ /** Called once per field per document if term vectors * are enabled, to write the vectors to * RAMOutputStream, which is then quickly flushed to - * * the real term vectors files in the Directory. */ + * the real term vectors files in the Directory. */ @Override void finish() throws IOException { @@ -188,6 +190,12 @@ } termsHashPerField.reset(); + + // NOTE: we clear, per-field, at the thread level, + // because term vectors fully write themselves on each + // field; this saves RAM (eg if large doc has two large + // fields w/ term vectors on) because we recycle/reuse + // all RAM after each field: perThread.termsHashPerThread.reset(false); } diff -r 2349e7ad1056 lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java --- a/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java Tue Sep 21 05:04:19 2010 +0000 +++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java Tue Sep 21 12:21:08 2010 -0400 @@ -71,7 +71,7 @@ writer.updateDocument(idTerm, doc); } catch (RuntimeException re) { if (VERBOSE) { - System.out.println("EXC: "); + System.out.println(Thread.currentThread().getName() + ": EXC: "); re.printStackTrace(System.out); } try {