Index: lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java =================================================================== --- lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java (revision 1463882) +++ lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java (working copy) @@ -29,7 +29,9 @@ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.queries.CommonTermsQuery; +import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.BooleanClause.Occur; @@ -71,6 +73,71 @@ dir.close(); } + public void testPhraseHighlightTest() throws IOException { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); + Document doc = new Document(); + FieldType type = new FieldType(TextField.TYPE_STORED); + type.setStoreTermVectorOffsets(true); + type.setStoreTermVectorPositions(true); + type.setStoreTermVectors(true); + type.freeze(); + Field longTermField = new Field("long_term", "This is a test thisisaverylongwordandmakessurethisfails where foo is highlighed and should be highlighted", type); + Field noLongTermField = new Field("no_long_term", "This is a test where foo is highlighed and should be highlighted", type); + + doc.add(longTermField); + doc.add(noLongTermField); + writer.addDocument(doc); + FastVectorHighlighter highlighter = new FastVectorHighlighter(); + IndexReader reader = DirectoryReader.open(writer, true); + int docId = 0; + String field = "no_long_term"; + { + PhraseQuery query = new PhraseQuery(); + query.add(new Term(field, "test")); + query.add(new Term(field, "foo")); + query.add(new Term(field, "highlighed")); + query.setSlop(3); + FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader); + String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, + docId, field, 18, 1); + // highlighted results are centered + assertEquals(1, bestFragments.length); + assertEquals("field: " + field, + "test where foo is highlighed", bestFragments[0]); + } + { + BooleanQuery query = new BooleanQuery(); + query.add(new TermQuery(new Term(field, "test")), Occur.MUST); + query.add(new TermQuery(new Term(field, "foo")), Occur.MUST); + query.add(new TermQuery(new Term(field, "highlighed")), Occur.MUST); + FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader); + String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, + docId, field, 18, 1); + // highlighted results are centered + assertEquals(1, bestFragments.length); + assertEquals("foo is highlighed and", bestFragments[0]); + } + field = "long_term"; + { + BooleanQuery query = new BooleanQuery(); + query.add(new TermQuery(new Term(field, + "thisisaverylongwordandmakessurethisfails")), Occur.MUST); + query.add(new TermQuery(new Term(field, "foo")), Occur.MUST); + query.add(new TermQuery(new Term(field, "highlighed")), Occur.MUST); + FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader); + String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, + docId, field, 18, 1); + // highlighted results are centered + assertEquals(1, bestFragments.length); + assertEquals("thisisaverylongwordandmakessurethisfails", + bestFragments[0]); + } + reader.close(); + writer.close(); + dir.close(); + } + public void testCommonTermsQueryHighlightTest() throws IOException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true))); Index: lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java =================================================================== --- lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java (revision 1463882) +++ lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java (working copy) @@ -42,7 +42,7 @@ SimpleFragListBuilder sflb = new SimpleFragListBuilder(); FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "abcdefghijklmnopqrs")), "abcdefghijklmnopqrs" ), sflb.minFragCharSize ); assertEquals( 1, ffl.getFragInfos().size() ); - assertEquals( "subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,18)", ffl.getFragInfos().get( 0 ).toString() ); + assertEquals( "subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,19)", ffl.getFragInfos().get( 0 ).toString() ); } public void testSmallerFragSizeThanPhraseQuery() throws Exception { @@ -55,7 +55,7 @@ FieldFragList ffl = sflb.createFieldFragList( fpl(phraseQuery, "abcdefgh jklmnopqrs" ), sflb.minFragCharSize ); assertEquals( 1, ffl.getFragInfos().size() ); if (VERBOSE) System.out.println( ffl.getFragInfos().get( 0 ).toString() ); - assertEquals( "subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(1,19)", ffl.getFragInfos().get( 0 ).toString() ); + assertEquals( "subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(0,21)", ffl.getFragInfos().get( 0 ).toString() ); } public void test1TermIndex() throws Exception { Index: lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragListBuilder.java =================================================================== --- lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragListBuilder.java (revision 1463882) +++ lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragListBuilder.java (working copy) @@ -77,29 +77,27 @@ en = phraseInfo.getEndOffset(); int lastEndOffset = phraseInfo.getEndOffset(); - while( true ){ - if( ite.hasNext() ){ - phraseInfo = ite.next(); - taken = true; - if( phraseInfo == null ) break; - } - else + while(ite.hasNext()){ + phraseInfo = ite.next(); + taken = true; + if( phraseInfo == null ) { break; - if( phraseInfo.getEndOffset() <= en ){ - wpil.add( phraseInfo ); - lastEndOffset = phraseInfo.getEndOffset(); } - else + if (phraseInfo.getEndOffset() <= en) { + wpil.add(phraseInfo); + lastEndOffset = phraseInfo.getEndOffset(); + } else { break; + } } int matchLen = lastEndOffset - firstOffset; //now recalculate the start and end position to "center" the result - int newMargin = (fragCharSize-matchLen)/2; + int newMargin = Math.max(0, (fragCharSize-matchLen)/2); st = firstOffset - newMargin; if(st