Index: contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java =================================================================== --- contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (revision 828797) +++ contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (working copy) @@ -95,13 +95,33 @@ } } } else if (query instanceof PhraseQuery) { - Term[] phraseQueryTerms = ((PhraseQuery) query).getTerms(); + PhraseQuery phraseQuery = ((PhraseQuery) query); + Term[] phraseQueryTerms = phraseQuery.getTerms(); SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length]; for (int i = 0; i < phraseQueryTerms.length; i++) { clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); } + int slop = phraseQuery.getSlop(); + int[] positions = phraseQuery.getPositions(); + if (positions.length > 0) { + + int lastPos = positions[0]; + int largestInc = 0; + int sz = positions.length; + for (int i = 1; i < sz; i++) { + int pos = positions[i]; + int inc = pos - lastPos; + if (inc > largestInc) { + largestInc = inc; + } - int slop = ((PhraseQuery) query).getSlop(); + lastPos = pos; + } + if(largestInc > 1) { + slop += largestInc; + } + } + boolean inorder = false; if (slop == 0) { Index: contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java =================================================================== --- contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (revision 828797) +++ contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (working copy) @@ -84,7 +84,7 @@ */ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatter { // TODO: change to CURRENT, does not work because posIncr: - static final Version TEST_VERSION = Version.LUCENE_24; + static final Version TEST_VERSION = Version.LUCENE_29; private IndexReader reader; static final String FIELD_NAME = "contents"; @@ -100,7 +100,7 @@ "This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy", "JFK has been shot", "John Kennedy has been shot", "This text has a typo in referring to Keneddy", - "wordx wordy wordz wordx wordy wordx worda wordb wordy wordc", "y z x y z a b" }; + "wordx wordy wordz wordx wordy wordx worda wordb wordy wordc", "y z x y z a b", "lets is a the lets is a the lets is a the lets" }; /** * Constructor for HighlightExtractorTest. @@ -256,6 +256,51 @@ assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 3); + + numHighlights = 0; + doSearching("\"This piece of text refers to Kennedy\""); + + maxNumFragmentsRequired = 2; + + scorer = new QueryScorer(query, FIELD_NAME); + highlighter = new Highlighter(this, scorer); + + for (int i = 0; i < hits.totalHits; i++) { + String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); + TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); + + highlighter.setTextFragmenter(new SimpleFragmenter(40)); + + String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, + "..."); + System.out.println("\t" + result); + } + + assertTrue("Failed to find correct number of highlights " + numHighlights + " found", + numHighlights == 4); + + numHighlights = 0; + doSearching("\"lets is a the lets is a the lets is a the lets\""); + + maxNumFragmentsRequired = 2; + + scorer = new QueryScorer(query, FIELD_NAME); + highlighter = new Highlighter(this, scorer); + + for (int i = 0; i < hits.totalHits; i++) { + String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); + TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); + + highlighter.setTextFragmenter(new SimpleFragmenter(40)); + + String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, + "..."); + System.out.println("\t" + result); + } + + assertTrue("Failed to find correct number of highlights " + numHighlights + " found", + numHighlights == 4); + } public void testSimpleQueryScorerPhraseHighlighting2() throws Exception { @@ -1531,6 +1576,7 @@ public void doSearching(String queryString) throws Exception { QueryParser parser = new QueryParser(FIELD_NAME, analyzer); + parser.setEnablePositionIncrements(true); parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); query = parser.parse(queryString); doSearching(query);