Index: lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java (revision 9fcd35df2ce3f9bf1c57f8bba78173d0ca54a152) +++ lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java (revision ) @@ -34,11 +34,11 @@ public void test1TermIndex() throws Exception { FieldFragList ffl = ffl( "a", "a" ); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); - assertEquals( "a ", sfb.createFragment( reader, 0, F, ffl ) ); + assertEquals( "a", sfb.createFragment( reader, 0, F, ffl ) ); // change tags sfb = new SimpleFragmentsBuilder( new String[]{ "[" }, new String[]{ "]" } ); - assertEquals( "[a] ", sfb.createFragment( reader, 0, F, ffl ) ); + assertEquals( "[a]", sfb.createFragment( reader, 0, F, ffl ) ); } public void test2Frags() throws Exception { @@ -48,7 +48,7 @@ // 3 snippets requested, but should be 2 assertEquals( 2, f.length ); assertEquals( "a b b b b b b b b b b", f[0] ); - assertEquals( "b b a b a b ", f[1] ); + assertEquals( "b b a b a b", f[1] ); } public void test3Frags() throws Exception { @@ -58,7 +58,7 @@ assertEquals( 3, f.length ); assertEquals( "a b b b b b b b b b b", f[0] ); assertEquals( "b b a b a b b b b b c", f[1] ); - assertEquals( "c a a b b ", f[2] ); + assertEquals( "c a a b b", f[2] ); } public void testTagsAndEncoder() throws Exception { @@ -66,7 +66,7 @@ SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); String[] preTags = { "[" }; String[] postTags = { "]" }; - assertEquals( "<h1> [a] </h1> ", + assertEquals( "<h1> [a] </h1>", sfb.createFragment( reader, 0, F, ffl, preTags, postTags, new SimpleHTMLEncoder() ) ); } @@ -88,7 +88,7 @@ SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.createFieldFragList( fpl, 100 ); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); - assertEquals( "a b c d e ", sfb.createFragment( reader, 0, F, ffl ) ); + assertEquals( "a b c d e", sfb.createFragment( reader, 0, F, ffl ) ); } public void test1PhraseLongMV() throws Exception { @@ -113,7 +113,7 @@ SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.createFieldFragList( fpl, 100 ); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); - assertEquals( "processing speed, the ", sfb.createFragment( reader, 0, F, ffl ) ); + assertEquals( "processing speed, the", sfb.createFragment( reader, 0, F, ffl ) ); } public void testUnstoredField() throws Exception { @@ -163,6 +163,6 @@ FieldFragList ffl = sflb.createFieldFragList( fpl, 100 ); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); sfb.setMultiValuedSeparator( '/' ); - assertEquals( " b c//d e/", sfb.createFragment( reader, 0, F, ffl ) ); + assertEquals( " b c//d e", sfb.createFragment( reader, 0, F, ffl ) ); } } Index: lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java (revision 9fcd35df2ce3f9bf1c57f8bba78173d0ca54a152) +++ lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java (revision ) @@ -183,7 +183,12 @@ buffer.append( getMultiValuedSeparator() ); index[0]++; } - int eo = buffer.length() < endOffset ? buffer.length() : boundaryScanner.findEndOffset( buffer, endOffset ); + int bufferLength = buffer.length(); + // we added the multi value char to the last buffer, ignore it + if (values[index[0] - 1].isTokenized()) { + bufferLength--; + } + int eo = bufferLength < endOffset ? bufferLength : boundaryScanner.findEndOffset( buffer, endOffset ); modifiedStartOffset[0] = boundaryScanner.findStartOffset( buffer, startOffset ); return buffer.substring( modifiedStartOffset[0], eo ); } Index: lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java (revision 9fcd35df2ce3f9bf1c57f8bba78173d0ca54a152) +++ lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java (revision ) @@ -27,7 +27,7 @@ String[] f = sofb.createFragments( reader, 0, F, ffl, 3 ); assertEquals( 3, f.length ); // check score order - assertEquals( "c a a b b ", f[0] ); + assertEquals( "c a a b b", f[0] ); assertEquals( "b b a b a b b b b b c", f[1] ); assertEquals( "a b b b b b b b b b b", f[2] ); }