Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1441376) +++ lucene/CHANGES.txt (working copy) @@ -87,6 +87,9 @@ * LUCENE-4723: Add AnalyzerFactoryTask to benchmark, and enable analyzer creation via the resulting factories using NewAnalyzerTask. (Steve Rowe) +* LUCENE-4728: Add support for highlighting CommonTermsQuery to all highlighter + implementations. (Simon Willnauer) + API Changes * LUCENE-4709: FacetResultNode no longer has a residue field. (Shai Erera) Index: lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java =================================================================== --- lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (revision 1441376) +++ lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (working copy) @@ -46,6 +46,7 @@ import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; +import org.apache.lucene.queries.CommonTermsQuery; import org.apache.lucene.search.*; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner; @@ -114,6 +115,39 @@ } } + public void testHighlightingCommonTermsQuery() throws Exception { + Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true); + CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, 3); + query.add(new Term(FIELD_NAME, "this")); + query.add(new Term(FIELD_NAME, "long")); + query.add(new Term(FIELD_NAME, "very")); + + searcher = new IndexSearcher(reader); + TopDocs hits = searcher.search(query, 10); + assertEquals(2, hits.totalHits); + QueryScorer scorer = new QueryScorer(query, FIELD_NAME); + Highlighter highlighter = new Highlighter(scorer); + + StoredDocument doc = searcher.doc(hits.scoreDocs[0].doc); + String storedField = doc.get(FIELD_NAME); + + TokenStream stream = TokenSources.getAnyTokenStream(searcher + .getIndexReader(), hits.scoreDocs[0].doc, FIELD_NAME, doc, analyzer); + Fragmenter fragmenter = new SimpleSpanFragmenter(scorer); + highlighter.setTextFragmenter(fragmenter); + String fragment = highlighter.getBestFragment(stream, storedField); + assertEquals("Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot", fragment); + + doc = searcher.doc(hits.scoreDocs[1].doc); + storedField = doc.get(FIELD_NAME); + + stream = TokenSources.getAnyTokenStream(searcher + .getIndexReader(), hits.scoreDocs[1].doc, FIELD_NAME, doc, analyzer); + highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer)); + fragment = highlighter.getBestFragment(stream, storedField); + assertEquals("This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very", fragment); + } + public void testHighlightingWithDefaultField() throws Exception { String s1 = "I call our world Flatland, not because we call it so,"; @@ -150,7 +184,7 @@ "Query in a named field does not result in highlighting when that field isn't in the query", s1, highlightField(q, FIELD_NAME, s1)); } - + /** * This method intended for use with testHighlightingWithDefaultField() */ Index: lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java =================================================================== --- lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java (revision 1441376) +++ lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java (working copy) @@ -18,6 +18,8 @@ import java.io.IOException; import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenFilter; +import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; @@ -26,7 +28,13 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; +import org.apache.lucene.queries.CommonTermsQuery; +import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.highlight.SimpleSpanFragmenter; +import org.apache.lucene.search.highlight.TokenSources; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; @@ -62,4 +70,47 @@ writer.close(); dir.close(); } + + public void testCommonTermsQueryHighlightTest() throws IOException { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true))); + FieldType type = new FieldType(TextField.TYPE_STORED); + type.setStoreTermVectorOffsets(true); + type.setStoreTermVectorPositions(true); + type.setStoreTermVectors(true); + type.freeze(); + String[] texts = { + "Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot", + "This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy", + "JFK has been shot", "John Kennedy has been shot", + "This text has a typo in referring to Keneddy", + "wordx wordy wordz wordx wordy wordx worda wordb wordy wordc", "y z x y z a b", "lets is a the lets is a the lets is a the lets" }; + for (int i = 0; i < texts.length; i++) { + Document doc = new Document(); + Field field = new Field("field", texts[i], type); + doc.add(field); + writer.addDocument(doc); + } + CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, 2); + query.add(new Term("field", "text")); + query.add(new Term("field", "long")); + query.add(new Term("field", "very")); + + FastVectorHighlighter highlighter = new FastVectorHighlighter(); + IndexReader reader = DirectoryReader.open(writer, true); + IndexSearcher searcher = new IndexSearcher(reader); + TopDocs hits = searcher.search(query, 10); + assertEquals(2, hits.totalHits); + FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader); + String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, hits.scoreDocs[0].doc, "field", 1000, 1); + assertEquals("This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy", bestFragments[0]); + + fieldQuery = highlighter.getFieldQuery(query, reader); + bestFragments = highlighter.getBestFragments(fieldQuery, reader, hits.scoreDocs[1].doc, "field", 1000, 1); + assertEquals("Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot", bestFragments[0]); + + reader.close(); + writer.close(); + dir.close(); + } } Index: lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java =================================================================== --- lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java (revision 1441376) +++ lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java (working copy) @@ -905,4 +905,18 @@ assertNotNull (fq.searchPhrase(F, phraseCandidate)); } + public void testStopRewrite() throws Exception { + Query q = new Query() { + + @Override + public String toString(String field) { + return "DummyQuery"; + } + + }; + make1d1fIndex( "a" ); + assertNotNull(reader); + new FieldQuery(q, reader, true, true ); + } + } Index: lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java =================================================================== --- lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (revision 1441376) +++ lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (working copy) @@ -34,6 +34,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; import org.apache.lucene.index.memory.MemoryIndex; +import org.apache.lucene.queries.CommonTermsQuery; import org.apache.lucene.search.*; import org.apache.lucene.search.spans.FieldMaskingSpanQuery; import org.apache.lucene.search.spans.SpanFirstQuery; @@ -146,6 +147,8 @@ if (q != null) { extract(q, terms); } + } else if (query instanceof CommonTermsQuery) { + extractWeightedTerms(terms, query); } else if (query instanceof DisjunctionMaxQuery) { for (Iterator iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) { extract(iterator.next(), terms); Index: lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java =================================================================== --- lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java (revision 1441376) +++ lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java (working copy) @@ -28,6 +28,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.queries.CommonTermsQuery; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.DisjunctionMaxQuery; @@ -92,8 +93,7 @@ if( !clause.isProhibited() ) flatten( clause.getQuery(), reader, flatQueries ); } - } - else if( sourceQuery instanceof DisjunctionMaxQuery ){ + } else if( sourceQuery instanceof DisjunctionMaxQuery ){ DisjunctionMaxQuery dmq = (DisjunctionMaxQuery)sourceQuery; for( Query query : dmq ){ flatten( query, reader, flatQueries ); @@ -103,12 +103,6 @@ if( !flatQueries.contains( sourceQuery ) ) flatQueries.add( sourceQuery ); } - else if (sourceQuery instanceof MultiTermQuery && reader != null) { - MultiTermQuery copy = (MultiTermQuery) sourceQuery.clone(); - copy.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(MAX_MTQ_TERMS)); - BooleanQuery mtqTerms = (BooleanQuery) copy.rewrite(reader); - flatten(mtqTerms, reader, flatQueries); - } else if( sourceQuery instanceof PhraseQuery ){ if( !flatQueries.contains( sourceQuery ) ){ PhraseQuery pq = (PhraseQuery)sourceQuery; @@ -118,6 +112,24 @@ flatQueries.add( new TermQuery( pq.getTerms()[0] ) ); } } + } else if (reader != null){ + Query query = sourceQuery; + if (sourceQuery instanceof MultiTermQuery) { + MultiTermQuery copy = (MultiTermQuery) sourceQuery.clone(); + copy.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(MAX_MTQ_TERMS)); + query = copy; + } + Query rewritten = query.rewrite(reader); + if (rewritten != query) { + /* if this query doesn't rewrite anymore we need to discard the query + * otherwise we will run into a stack overflow.*/ + for (;rewritten != query; rewritten = query.rewrite(reader)) { + query = rewritten; + } + flatten(query, reader, flatQueries); + } + + } // else discard queries } Index: lucene/highlighter/build.xml =================================================================== --- lucene/highlighter/build.xml (revision 1441376) +++ lucene/highlighter/build.xml (working copy) @@ -27,6 +27,7 @@ + Index: dev-tools/maven/lucene/highlighter/pom.xml.template =================================================================== --- dev-tools/maven/lucene/highlighter/pom.xml.template (revision 1441376) +++ dev-tools/maven/lucene/highlighter/pom.xml.template (working copy) @@ -61,6 +61,11 @@ lucene-memory ${project.version} + + ${project.groupId} + lucene-queries + ${project.version} + ${module-path}/src/java