Index: lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java
===================================================================
--- lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java (revision 1463882)
+++ lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java (working copy)
@@ -29,7 +29,9 @@
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.CommonTermsQuery;
+import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.BooleanClause.Occur;
@@ -71,6 +73,71 @@
dir.close();
}
+ public void testPhraseHighlightTest() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
+ Document doc = new Document();
+ FieldType type = new FieldType(TextField.TYPE_STORED);
+ type.setStoreTermVectorOffsets(true);
+ type.setStoreTermVectorPositions(true);
+ type.setStoreTermVectors(true);
+ type.freeze();
+ Field longTermField = new Field("long_term", "This is a test thisisaverylongwordandmakessurethisfails where foo is highlighed and should be highlighted", type);
+ Field noLongTermField = new Field("no_long_term", "This is a test where foo is highlighed and should be highlighted", type);
+
+ doc.add(longTermField);
+ doc.add(noLongTermField);
+ writer.addDocument(doc);
+ FastVectorHighlighter highlighter = new FastVectorHighlighter();
+ IndexReader reader = DirectoryReader.open(writer, true);
+ int docId = 0;
+ String field = "no_long_term";
+ {
+ PhraseQuery query = new PhraseQuery();
+ query.add(new Term(field, "test"));
+ query.add(new Term(field, "foo"));
+ query.add(new Term(field, "highlighed"));
+ query.setSlop(3);
+ FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
+ String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+ docId, field, 18, 1);
+ // highlighted results are centered
+ assertEquals(1, bestFragments.length);
+ assertEquals("field: " + field,
+ "test where foo is highlighed", bestFragments[0]);
+ }
+ {
+ BooleanQuery query = new BooleanQuery();
+ query.add(new TermQuery(new Term(field, "test")), Occur.MUST);
+ query.add(new TermQuery(new Term(field, "foo")), Occur.MUST);
+ query.add(new TermQuery(new Term(field, "highlighed")), Occur.MUST);
+ FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
+ String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+ docId, field, 18, 1);
+ // highlighted results are centered
+ assertEquals(1, bestFragments.length);
+ assertEquals("foo is highlighed and", bestFragments[0]);
+ }
+ field = "long_term";
+ {
+ BooleanQuery query = new BooleanQuery();
+ query.add(new TermQuery(new Term(field,
+ "thisisaverylongwordandmakessurethisfails")), Occur.MUST);
+ query.add(new TermQuery(new Term(field, "foo")), Occur.MUST);
+ query.add(new TermQuery(new Term(field, "highlighed")), Occur.MUST);
+ FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
+ String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader,
+ docId, field, 18, 1);
+ // highlighted results are centered
+ assertEquals(1, bestFragments.length);
+ assertEquals("thisisaverylongwordandmakessurethisfails",
+ bestFragments[0]);
+ }
+ reader.close();
+ writer.close();
+ dir.close();
+ }
+
public void testCommonTermsQueryHighlightTest() throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)));
Index: lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java
===================================================================
--- lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java (revision 1463882)
+++ lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java (working copy)
@@ -42,7 +42,7 @@
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl(new TermQuery(new Term(F, "abcdefghijklmnopqrs")), "abcdefghijklmnopqrs" ), sflb.minFragCharSize );
assertEquals( 1, ffl.getFragInfos().size() );
- assertEquals( "subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,18)", ffl.getFragInfos().get( 0 ).toString() );
+ assertEquals( "subInfos=(abcdefghijklmnopqrs((0,19)))/1.0(0,19)", ffl.getFragInfos().get( 0 ).toString() );
}
public void testSmallerFragSizeThanPhraseQuery() throws Exception {
@@ -55,7 +55,7 @@
FieldFragList ffl = sflb.createFieldFragList( fpl(phraseQuery, "abcdefgh jklmnopqrs" ), sflb.minFragCharSize );
assertEquals( 1, ffl.getFragInfos().size() );
if (VERBOSE) System.out.println( ffl.getFragInfos().get( 0 ).toString() );
- assertEquals( "subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(1,19)", ffl.getFragInfos().get( 0 ).toString() );
+ assertEquals( "subInfos=(abcdefghjklmnopqrs((0,21)))/1.0(0,21)", ffl.getFragInfos().get( 0 ).toString() );
}
public void test1TermIndex() throws Exception {
Index: lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragListBuilder.java
===================================================================
--- lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragListBuilder.java (revision 1463882)
+++ lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragListBuilder.java (working copy)
@@ -77,29 +77,27 @@
en = phraseInfo.getEndOffset();
int lastEndOffset = phraseInfo.getEndOffset();
- while( true ){
- if( ite.hasNext() ){
- phraseInfo = ite.next();
- taken = true;
- if( phraseInfo == null ) break;
- }
- else
+ while(ite.hasNext()){
+ phraseInfo = ite.next();
+ taken = true;
+ if( phraseInfo == null ) {
break;
- if( phraseInfo.getEndOffset() <= en ){
- wpil.add( phraseInfo );
- lastEndOffset = phraseInfo.getEndOffset();
}
- else
+ if (phraseInfo.getEndOffset() <= en) {
+ wpil.add(phraseInfo);
+ lastEndOffset = phraseInfo.getEndOffset();
+ } else {
break;
+ }
}
int matchLen = lastEndOffset - firstOffset;
//now recalculate the start and end position to "center" the result
- int newMargin = (fragCharSize-matchLen)/2;
+ int newMargin = Math.max(0, (fragCharSize-matchLen)/2);
st = firstOffset - newMargin;
if(st