Index: lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java =================================================================== --- lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java (revision 1513163) +++ lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighterRanking.java (working copy) @@ -173,6 +173,8 @@ assertTrue(p.getNumMatches() > 0); assertTrue(p.getStartOffset() >= 0); assertTrue(p.getStartOffset() <= content.length()); + assertTrue(p.getEndOffset() >= p.getStartOffset()); + assertTrue(p.getEndOffset() <= content.length()); // we use a very simple analyzer. so we can assert the matches are correct int lastMatchStart = -1; for (int i = 0; i < p.getNumMatches(); i++) { Index: lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java =================================================================== --- lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java (revision 1513163) +++ lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java (working copy) @@ -23,6 +23,9 @@ import java.text.BreakIterator; import java.util.Map; +// nocommit +import java.util.Arrays; + import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; @@ -87,6 +90,49 @@ dir.close(); } + public void testFormatWithMatchExceedingContentLength() throws Exception { + + int maxLength = 17; + String bodyText = "123 5678 01234 TEST"; + + final Analyzer analyzer = new MockAnalyzer(random()); + + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); + iwc.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); + + final FieldType fieldType = new FieldType(TextField.TYPE_STORED); + fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); + final Field body = new Field("body", bodyText, fieldType); + + Document doc = new Document(); + doc.add(body); + + iw.addDocument(doc); + + IndexReader ir = iw.getReader(); + iw.close(); + + IndexSearcher searcher = newSearcher(ir); + + Query query = new TermQuery(new Term("body", "test")); + + TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER); + assertEquals(1, topDocs.totalHits); + + PostingsHighlighter highlighter = new PostingsHighlighter(maxLength); + String snippets[] = highlighter.highlight("body", query, searcher, topDocs); + + + assertEquals(1, snippets.length); + // LUCENE-5166: no snippet + assertEquals("123 5678 01234 TE", snippets[0]); + + ir.close(); + dir.close(); + } + // simple test with one sentence documents. public void testOneSentence() throws Exception { Directory dir = newDirectory(); @@ -966,4 +1012,44 @@ ir.close(); dir.close(); } + + // LUCENE-5166 + public void testTruncateLastPassage() throws Exception { + int maxLength = 27; + String bodyText = "abcd foobar. bah. abcd goobar."; + + final Analyzer analyzer = new MockAnalyzer(random()); + + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); + iwc.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); + + final FieldType fieldType = new FieldType(TextField.TYPE_STORED); + fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); + final Field body = new Field("body", bodyText, fieldType); + + Document doc = new Document(); + doc.add(body); + + iw.addDocument(doc); + + IndexReader ir = iw.getReader(); + iw.close(); + + IndexSearcher searcher = newSearcher(ir); + + Query query = new TermQuery(new Term("body", "abcd")); + + TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER); + assertEquals(1, topDocs.totalHits); + + PostingsHighlighter highlighter = new PostingsHighlighter(maxLength); + String snippets[] = highlighter.highlight("body", query, searcher, topDocs); + assertEquals(1, snippets.length); + // abcd goobar passage was truncated so should not be included: + assertEquals("abcd foobar", snippets[0]); + ir.close(); + dir.close(); + } } Index: lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java =================================================================== --- lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java (revision 1513163) +++ lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java (working copy) @@ -506,6 +506,13 @@ throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight"); } int end = dp.endOffset(); + // LUCENE-5166: this hit would span the content limit... however more valid + // hits may exist (they are sorted by start). so we pretend like we never + // saw this term, it won't cause a passage to be added to passageQueue or anything. + assert EMPTY.startOffset() == Integer.MAX_VALUE; + if (start < contentLength && end >= contentLength) { + continue; + } if (start >= current.endOffset) { if (current.startOffset >= 0) { // finalize current