Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1133885) +++ lucene/CHANGES.txt (working copy) @@ -468,6 +468,11 @@ * LUCENE-3102: CachingCollector.replay was failing to call setScorer per-segment (Martijn van Groningen via Mike McCandless) +* LUCENE-3183: Fix rare corner case where seeking to empty term + (field="", term="") with terms index interval 1 could hit + ArrayIndexOutOfBoundsException (selckin, Robert Muir, Mike + McCandless) + New Features * LUCENE-3140: Added experimental FST implementation to Lucene. Index: lucene/src/test/org/apache/lucene/index/TestIndexWriter.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (revision 1133885) +++ lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (working copy) @@ -73,6 +73,7 @@ import org.apache.lucene.util.ThreadInterruptedException; import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util._TestUtil; +import org.apache.lucene.index.codecs.preflexrw.PreFlexRWCodec; public class TestIndexWriter extends LuceneTestCase { @@ -1763,4 +1764,19 @@ reader.close(); dir.close(); } + + // LUCENE-3183 + public void testEmptyFieldNameTIIOne() throws IOException { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) + .setCodecProvider(_TestUtil.alwaysCodec(new PreFlexRWCodec())); + iwc.setTermIndexInterval(1); + iwc.setReaderTermsIndexDivisor(1); + IndexWriter writer = new IndexWriter(dir, iwc); + Document doc = new Document(); + doc.add(newField("", "a b c", Field.Store.NO, Field.Index.ANALYZED)); + writer.addDocument(doc); + writer.close(); + dir.close(); + } } Index: lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermEnum.java (revision 1133885) +++ lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermEnum.java (working copy) @@ -153,8 +153,12 @@ return true; } - /** Optimized scan, without allocating new terms. - * Return number of invocations to next(). */ + /* Optimized scan, without allocating new terms. + * Return number of invocations to next(). + * + * NOTE: LUCENE-3183: if you pass Term("", "") here then this + * will incorrectly return before positioning the enum, + * and position will be -1; caller must detect this. */ final int scanTo(Term term) throws IOException { scanBuffer.set(term); int count = 0; Index: lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java (revision 1133885) +++ lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java (working copy) @@ -57,6 +57,7 @@ final long termOrd; public TermInfoAndOrd(TermInfo ti, long termOrd) { super(ti); + assert termOrd >= 0; this.termOrd = termOrd; } } @@ -306,7 +307,13 @@ ti = enumerator.termInfo; if (tiOrd == null) { if (useCache) { - termsCache.put(new CloneableTerm(term), new TermInfoAndOrd(ti, enumerator.position)); + // LUCENE-3183: it's possible, if term is Term("", + // ""), for the STE to be incorrectly un-positioned + // after scan-to; work around this by not caching in + // this case: + if (enumerator.position >= 0) { + termsCache.put(new CloneableTerm(term), new TermInfoAndOrd(ti, enumerator.position)); + } } } else { assert sameTermInfo(ti, tiOrd, enumerator);