Index: lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
===================================================================
--- lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (revision 1197170)
+++ lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (working copy)
@@ -79,6 +79,36 @@
"This text has a typo in referring to Keneddy",
"wordx wordy wordz wordx wordy wordx worda wordb wordy wordc", "y z x y z a b", "lets is a the lets is a the lets is a the lets" };
+ public void testForIssue2587() throws Exception {
+ TestHighlightRunner helper = new TestHighlightRunner() {
+
+ @Override
+ public void run() throws Exception {
+ TermQuery query = new TermQuery(new Term("data", "g"));
+ Highlighter hg = new Highlighter(new SimpleHTMLFormatter(), new QueryTermScorer(query));
+
+ hg.setTextFragmenter(new Fragmenter() {
+ private CharTermAttribute termAtt;
+
+ public void start(String originalText, TokenStream tokenStream) {
+ termAtt = tokenStream.addAttribute(CharTermAttribute.class);
+ }
+
+ public boolean isNewFragment() {
+ return (termAtt.toString().equals("f") || termAtt.toString().equals("k"));
+ }
+ });
+
+ String match = hg.getBestFragment(analyzer, "data", "A b c d e... F g h i j! K l m n o. ");
+
+ assertEquals("F g h i j", match);
+
+ }
+ };
+
+ helper.start();
+ }
+
public void testQueryScorerHits() throws Exception {
Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.SIMPLE, true);
Index: lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
===================================================================
--- lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java (revision 1197170)
+++ lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java (working copy)
@@ -254,7 +254,8 @@
currentFrag.setScore(fragmentScorer.getFragmentScore());
//record stats for a new fragment
currentFrag.textEndPos = newText.length();
- currentFrag =new TextFragment(newText, newText.length(), docFrags.size());
+ // XXX FIX FOR LUCENE-2587
+ currentFrag = new TextFragment(newText, newText.length() + offsetAtt.startOffset() - endOffset, docFrags.size());
fragmentScorer.startFragment(currentFrag);
docFrags.add(currentFrag);
}
Index: lucene/CHANGES.txt
===================================================================
--- lucene/CHANGES.txt (revision 1197170)
+++ lucene/CHANGES.txt (working copy)
@@ -422,6 +422,10 @@
data in a single text file for transparency (at the expense of poor
performance). (Sahin Buyrukbilen via Mike McCandless)
+* LUCENE-2587: Computing the right offset in the case of trailing whitespaces.
+ and added a test case testForIssue2587 demonstrating the issue. Before the fix
+ this test case would fail, returning ". F g h i j" as hitline. (Roberto Minelli)
+
* LUCENE-2589: Add a VariableSizedIntIndexInput, which, when used w/
Sep*, makes it simple to take any variable sized int block coders
(like Simple9/16) and use them in a codec. (Mike McCandless)