Index: lucene/CHANGES.txt
===================================================================
--- lucene/CHANGES.txt (revision 1439622)
+++ lucene/CHANGES.txt (working copy)
@@ -82,6 +82,9 @@
* LUCENE-4723: Add AnalyzerFactoryTask to benchmark, and enable analyzer
creation via the resulting factories using NewAnalyzerTask. (Steve Rowe)
+* LUCENE-4728: Add support for highlighting CommonTermsQuery to all highlighter
+ implementations. (Simon Willnauer)
+
API Changes
* LUCENE-4709: FacetResultNode no longer has a residue field. (Shai Erera)
Index: lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
===================================================================
--- lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (revision 1439622)
+++ lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (working copy)
@@ -46,6 +46,7 @@
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
+import org.apache.lucene.queries.CommonTermsQuery;
import org.apache.lucene.search.*;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner;
@@ -114,6 +115,39 @@
}
}
+ public void testHighlightingCommonTermsQuery() throws Exception {
+ Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
+ CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, 3);
+ query.add(new Term(FIELD_NAME, "this"));
+ query.add(new Term(FIELD_NAME, "long"));
+ query.add(new Term(FIELD_NAME, "very"));
+
+ searcher = new IndexSearcher(reader);
+ TopDocs hits = searcher.search(query, 10);
+ assertEquals(2, hits.totalHits);
+ QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
+ Highlighter highlighter = new Highlighter(scorer);
+
+ StoredDocument doc = searcher.doc(hits.scoreDocs[0].doc);
+ String storedField = doc.get(FIELD_NAME);
+
+ TokenStream stream = TokenSources.getAnyTokenStream(searcher
+ .getIndexReader(), hits.scoreDocs[0].doc, FIELD_NAME, doc, analyzer);
+ Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
+ highlighter.setTextFragmenter(fragmenter);
+ String fragment = highlighter.getBestFragment(stream, storedField);
+ assertEquals("Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot", fragment);
+
+ doc = searcher.doc(hits.scoreDocs[1].doc);
+ storedField = doc.get(FIELD_NAME);
+
+ stream = TokenSources.getAnyTokenStream(searcher
+ .getIndexReader(), hits.scoreDocs[1].doc, FIELD_NAME, doc, analyzer);
+ highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));
+ fragment = highlighter.getBestFragment(stream, storedField);
+ assertEquals("This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very", fragment);
+ }
+
public void testHighlightingWithDefaultField() throws Exception {
String s1 = "I call our world Flatland, not because we call it so,";
@@ -150,7 +184,7 @@
"Query in a named field does not result in highlighting when that field isn't in the query",
s1, highlightField(q, FIELD_NAME, s1));
}
-
+
/**
* This method intended for use with testHighlightingWithDefaultField()
*/
Index: lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java
===================================================================
--- lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java (revision 1439622)
+++ lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java (working copy)
@@ -18,6 +18,8 @@
import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenFilter;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
@@ -26,7 +28,13 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
+import org.apache.lucene.queries.CommonTermsQuery;
+import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
+import org.apache.lucene.search.highlight.TokenSources;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
@@ -62,4 +70,47 @@
writer.close();
dir.close();
}
+
+ public void testCommonTermsQueryHighlightTest() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)));
+ FieldType type = new FieldType(TextField.TYPE_STORED);
+ type.setStoreTermVectorOffsets(true);
+ type.setStoreTermVectorPositions(true);
+ type.setStoreTermVectors(true);
+ type.freeze();
+ String[] texts = {
+ "Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot",
+ "This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy",
+ "JFK has been shot", "John Kennedy has been shot",
+ "This text has a typo in referring to Keneddy",
+ "wordx wordy wordz wordx wordy wordx worda wordb wordy wordc", "y z x y z a b", "lets is a the lets is a the lets is a the lets" };
+ for (int i = 0; i < texts.length; i++) {
+ Document doc = new Document();
+ Field field = new Field("field", texts[i], type);
+ doc.add(field);
+ writer.addDocument(doc);
+ }
+ CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, 2);
+ query.add(new Term("field", "text"));
+ query.add(new Term("field", "long"));
+ query.add(new Term("field", "very"));
+
+ FastVectorHighlighter highlighter = new FastVectorHighlighter();
+ IndexReader reader = DirectoryReader.open(writer, true);
+ IndexSearcher searcher = new IndexSearcher(reader);
+ TopDocs hits = searcher.search(query, 10);
+ assertEquals(2, hits.totalHits);
+ FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader);
+ String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, hits.scoreDocs[0].doc, "field", 1000, 1);
+ assertEquals("This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy", bestFragments[0]);
+
+ fieldQuery = highlighter.getFieldQuery(query, reader);
+ bestFragments = highlighter.getBestFragments(fieldQuery, reader, hits.scoreDocs[1].doc, "field", 1000, 1);
+ assertEquals("Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot", bestFragments[0]);
+
+ reader.close();
+ writer.close();
+ dir.close();
+ }
}
Index: lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
===================================================================
--- lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (revision 1439622)
+++ lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (working copy)
@@ -34,6 +34,7 @@
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.memory.MemoryIndex;
+import org.apache.lucene.queries.CommonTermsQuery;
import org.apache.lucene.search.*;
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
import org.apache.lucene.search.spans.SpanFirstQuery;
@@ -146,6 +147,8 @@
if (q != null) {
extract(q, terms);
}
+ } else if (query instanceof CommonTermsQuery) {
+ extractWeightedTerms(terms, query);
} else if (query instanceof DisjunctionMaxQuery) {
for (Iterator iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) {
extract(iterator.next(), terms);
Index: lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java
===================================================================
--- lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java (revision 1439622)
+++ lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java (working copy)
@@ -28,6 +28,7 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
+import org.apache.lucene.queries.CommonTermsQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
@@ -92,6 +93,8 @@
if( !clause.isProhibited() )
flatten( clause.getQuery(), reader, flatQueries );
}
+ } else if (sourceQuery instanceof CommonTermsQuery) {
+ flatten(sourceQuery.rewrite(reader), reader, flatQueries);
}
else if( sourceQuery instanceof DisjunctionMaxQuery ){
DisjunctionMaxQuery dmq = (DisjunctionMaxQuery)sourceQuery;
Index: lucene/highlighter/build.xml
===================================================================
--- lucene/highlighter/build.xml (revision 1439622)
+++ lucene/highlighter/build.xml (working copy)
@@ -27,6 +27,7 @@
+
Index: dev-tools/maven/lucene/highlighter/pom.xml.template
===================================================================
--- dev-tools/maven/lucene/highlighter/pom.xml.template (revision 1439622)
+++ dev-tools/maven/lucene/highlighter/pom.xml.template (working copy)
@@ -61,6 +61,11 @@
lucene-memory
${project.version}
+
+ ${project.groupId}
+ lucene-queries
+ ${project.version}
+
${module-path}/src/java