Index: lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java =================================================================== --- lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (revision 1197170) +++ lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (working copy) @@ -79,6 +79,36 @@ "This text has a typo in referring to Keneddy", "wordx wordy wordz wordx wordy wordx worda wordb wordy wordc", "y z x y z a b", "lets is a the lets is a the lets is a the lets" }; + public void testForIssue2587() throws Exception { + TestHighlightRunner helper = new TestHighlightRunner() { + + @Override + public void run() throws Exception { + TermQuery query = new TermQuery(new Term("data", "g")); + Highlighter hg = new Highlighter(new SimpleHTMLFormatter(), new QueryTermScorer(query)); + + hg.setTextFragmenter(new Fragmenter() { + private CharTermAttribute termAtt; + + public void start(String originalText, TokenStream tokenStream) { + termAtt = tokenStream.addAttribute(CharTermAttribute.class); + } + + public boolean isNewFragment() { + return (termAtt.toString().equals("f") || termAtt.toString().equals("k")); + } + }); + + String match = hg.getBestFragment(analyzer, "data", "A b c d e... F g h i j! K l m n o. "); + + assertEquals("F g h i j", match); + + } + }; + + helper.start(); + } + public void testQueryScorerHits() throws Exception { Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.SIMPLE, true); Index: lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java =================================================================== --- lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java (revision 1197170) +++ lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java (working copy) @@ -254,7 +254,8 @@ currentFrag.setScore(fragmentScorer.getFragmentScore()); //record stats for a new fragment currentFrag.textEndPos = newText.length(); - currentFrag =new TextFragment(newText, newText.length(), docFrags.size()); + // XXX FIX FOR LUCENE-2587 + currentFrag = new TextFragment(newText, newText.length() + offsetAtt.startOffset() - endOffset, docFrags.size()); fragmentScorer.startFragment(currentFrag); docFrags.add(currentFrag); } Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1197170) +++ lucene/CHANGES.txt (working copy) @@ -422,6 +422,10 @@ data in a single text file for transparency (at the expense of poor performance). (Sahin Buyrukbilen via Mike McCandless) +* LUCENE-2587: Computing the right offset in the case of trailing whitespaces. + and added a test case testForIssue2587 demonstrating the issue. Before the fix + this test case would fail, returning ". F g h i j" as hitline. (Roberto Minelli) + * LUCENE-2589: Add a VariableSizedIntIndexInput, which, when used w/ Sep*, makes it simple to take any variable sized int block coders (like Simple9/16) and use them in a codec. (Mike McCandless)