Index: lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FastVectorHighlighter.java =================================================================== --- lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FastVectorHighlighter.java (revision 1165079) +++ lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FastVectorHighlighter.java (working copy) @@ -76,9 +76,24 @@ * @param query a query * @return the created {@link FieldQuery} object */ - public FieldQuery getFieldQuery( Query query ){ - return new FieldQuery( query, phraseHighlight, fieldMatch ); + public FieldQuery getFieldQuery( Query query ) { + try { + return new FieldQuery( query, null, phraseHighlight, fieldMatch ); + } catch (IOException e) { + // should never be thrown when reader is null + throw new RuntimeException (e); + } } + + /** + * create a {@link FieldQuery} object. + * + * @param query a query + * @return the created {@link FieldQuery} object + */ + public FieldQuery getFieldQuery( Query query, IndexReader reader ) throws IOException { + return new FieldQuery( query, reader, phraseHighlight, fieldMatch ); + } /** * return the best fragment. Index: lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java =================================================================== --- lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java (revision 1165079) +++ lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java (working copy) @@ -16,6 +16,7 @@ * limitations under the License. */ +import java.io.IOException; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; @@ -24,10 +25,12 @@ import java.util.Map; import java.util.Set; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.DisjunctionMaxQuery; +import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; @@ -51,16 +54,19 @@ int termOrPhraseNumber; // used for colored tag support - FieldQuery( Query query, boolean phraseHighlight, boolean fieldMatch ){ + // The maximum number of different matching terms accumulated from any one MultiTermQuery + private static final int MAX_MTQ_TERMS = 1024; + + FieldQuery( Query query, IndexReader reader, boolean phraseHighlight, boolean fieldMatch ) throws IOException { this.fieldMatch = fieldMatch; Set flatQueries = new HashSet(); flatten( query, flatQueries ); - saveTerms( flatQueries ); + saveTerms( flatQueries, reader ); Collection expandQueries = expand( flatQueries ); for( Query flatQuery : expandQueries ){ QueryPhraseMap rootMap = getRootMap( flatQuery ); - rootMap.add( flatQuery ); + rootMap.add( flatQuery, reader ); if( !phraseHighlight && flatQuery instanceof PhraseQuery ){ PhraseQuery pq = (PhraseQuery)flatQuery; if( pq.getTerms().length > 1 ){ @@ -71,6 +77,13 @@ } } + /** For backwards compatibility you can initialize FieldQuery without + * an IndexReader, which is only required to support MultiTermQuery + */ + FieldQuery( Query query, boolean phraseHighlight, boolean fieldMatch ) throws IOException { + this (query, null, phraseHighlight, fieldMatch); + } + void flatten( Query sourceQuery, Collection flatQueries ){ if( sourceQuery instanceof BooleanQuery ){ BooleanQuery bq = (BooleanQuery)sourceQuery; @@ -89,6 +102,11 @@ if( !flatQueries.contains( sourceQuery ) ) flatQueries.add( sourceQuery ); } + else if (sourceQuery instanceof MultiTermQuery) { + MultiTermQuery copy = (MultiTermQuery) sourceQuery.clone(); + copy.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(MAX_MTQ_TERMS)); + flatQueries.add (copy); + } else if( sourceQuery instanceof PhraseQuery ){ if( !flatQueries.contains( sourceQuery ) ){ PhraseQuery pq = (PhraseQuery)sourceQuery; @@ -207,6 +225,9 @@ Term[] terms = pq.getTerms(); return terms[0].field(); } + else if (query instanceof MultiTermQuery) { + return ((MultiTermQuery)query).getField(); + } else throw new RuntimeException( "query \"" + query.toString() + "\" must be flatten first." ); } @@ -233,7 +254,7 @@ * - fieldMatch==false * termSetMap=Map> */ - void saveTerms( Collection flatQueries ){ + void saveTerms( Collection flatQueries, IndexReader reader ) throws IOException{ for( Query query : flatQueries ){ Set termSet = getTermSet( query ); if( query instanceof TermQuery ) @@ -242,6 +263,12 @@ for( Term term : ((PhraseQuery)query).getTerms() ) termSet.add( term.text() ); } + else if (query instanceof MultiTermQuery && reader != null) { + BooleanQuery mtqTerms = (BooleanQuery) query.rewrite(reader); + for (BooleanClause clause : mtqTerms.getClauses()) { + termSet.add (((TermQuery) clause.getQuery()).getTerm().text()); + } + } else throw new RuntimeException( "query \"" + query.toString() + "\" must be flatten first." ); } @@ -319,7 +346,7 @@ return map; } - void add( Query query ){ + void add( Query query, IndexReader reader ) throws IOException { if( query instanceof TermQuery ){ addTerm( ((TermQuery)query).getTerm(), query.getBoost() ); } @@ -334,6 +361,13 @@ } qpm.markTerminal( pq.getSlop(), pq.getBoost() ); } + else if (query instanceof MultiTermQuery) { + // FIXME - reuse query previously rewritten during FieldQuery ctor + BooleanQuery mtqTerms = (BooleanQuery) query.rewrite(reader); + for (BooleanClause clause : mtqTerms.getClauses()) { + addTerm (((TermQuery) clause.getQuery()).getTerm(), query.getBoost()); + } + } else throw new RuntimeException( "query \"" + query.toString() + "\" must be flatten first." ); } Index: lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java =================================================================== --- lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java (revision 1165079) +++ lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java (working copy) @@ -72,6 +72,10 @@ public FieldTermStack( IndexReader reader, int docId, String fieldName, final FieldQuery fieldQuery ) throws IOException { this.fieldName = fieldName; + Set termSet = fieldQuery.getTermSet( fieldName ); + // just return to make null snippet if un-matched fieldName specified when fieldMatch == true + if( termSet == null ) return; + TermFreqVector tfv = reader.getTermFreqVector( docId, fieldName ); if( tfv == null ) return; // just return to make null snippets TermPositionVector tpv = null; @@ -82,9 +86,6 @@ return; // just return to make null snippets } - Set termSet = fieldQuery.getTermSet( fieldName ); - // just return to make null snippet if un-matched fieldName specified when fieldMatch == true - if( termSet == null ) return; final CharsRef spare = new CharsRef(); for( BytesRef term : tpv.getTerms() ){ if( !termSet.contains( term.utf8ToChars(spare).toString() ) ) continue; Index: lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/package.html =================================================================== --- lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/package.html (revision 1165079) +++ lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/package.html (working copy) @@ -24,6 +24,7 @@
  • fast for large docs
  • support N-gram fields
  • support phrase-unit highlighting with slops
  • +
  • support multi-term (includes wildcard, range, regexp, etc) queries
  • need Java 1.5
  • highlight fields need to be stored with Positions and Offsets
  • take into account query boost to score fragments
  • Index: lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java =================================================================== --- lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java (revision 1165079) +++ lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java (working copy) @@ -16,19 +16,23 @@ * limitations under the License. */ +import java.io.IOException; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; -import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.RegexpQuery; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.search.vectorhighlight.FieldQuery.QueryPhraseMap; import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo; import org.apache.lucene.util.BytesRef; @@ -869,4 +873,36 @@ phraseCandidate.add( new TermInfo( "c", 4, 5, 6 ) ); assertNull( fq.searchPhrase( F, phraseCandidate ) ); } + + public void testHighlightQuery() throws Exception { + makeIndexStrMV(); + defgMultiTermQueryTest(new WildcardQuery(new Term(F, "d*g"))); + } + + public void testPrefixQuery() throws Exception { + makeIndexStrMV(); + defgMultiTermQueryTest(new PrefixQuery(new Term(F, "de"))); + } + + public void testRegexpQuery() throws Exception { + makeIndexStrMV(); + Term term = new Term(F, "d[a-z].g"); + defgMultiTermQueryTest(new RegexpQuery (term)); + } + + public void testRangeQuery() throws Exception { + makeIndexStrMV(); + defgMultiTermQueryTest(new TermRangeQuery (F, new BytesRef("d"), new BytesRef("e"), true, true)); + } + + private void defgMultiTermQueryTest(Query query) throws IOException { + FieldQuery fq = new FieldQuery( query, reader, true, true ); + QueryPhraseMap qpm = fq.getFieldTermMap(F, "defg"); + assertNotNull (qpm); + assertNull (fq.getFieldTermMap(F, "dog")); + List phraseCandidate = new ArrayList(); + phraseCandidate.add( new TermInfo( "defg", 0, 12, 0 ) ); + assertNotNull (fq.searchPhrase(F, phraseCandidate)); + } + } Index: lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldTermStackTest.java =================================================================== --- lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldTermStackTest.java (revision 1165079) +++ lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldTermStackTest.java (working copy) @@ -16,8 +16,10 @@ * limitations under the License. */ +import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.WildcardQuery; public class FieldTermStackTest extends AbstractTestCase { @@ -158,4 +160,17 @@ assertEquals( "ee(90,92,63)", stack.pop().toString() ); assertEquals( "ed(91,93,64)", stack.pop().toString() ); } + + + public void testWildcard() throws Exception { + makeIndexLongMV(); + FieldQuery fq = new FieldQuery( new WildcardQuery (new Term(F, "th*e")), reader, true, true ); + FieldTermStack stack = new FieldTermStack( reader, 0, F, fq ); + assertEquals (4, stack.termList.size()); + assertEquals ("the(15,18,2)", stack.pop().toString()); + assertEquals ("these(133,138,20)", stack.pop().toString()); + assertEquals ("the(153,156,23)", stack.pop().toString()); + assertEquals ("the(195,198,31)", stack.pop().toString()); + } + } Index: modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetVectorHighlightTask.java =================================================================== --- modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetVectorHighlightTask.java (revision 1165079) +++ modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetVectorHighlightTask.java (working copy) @@ -95,11 +95,12 @@ @Override protected BenchmarkHighlighter getBenchmarkHighlighter(Query q){ highlighter = new FastVectorHighlighter( false, false ); - final FieldQuery fq = highlighter.getFieldQuery( q ); + final Query myq = q; return new BenchmarkHighlighter(){ @Override public int doHighlight(IndexReader reader, int doc, String field, Document document, Analyzer analyzer, String text) throws Exception { + final FieldQuery fq = highlighter.getFieldQuery( myq, reader); String[] fragments = highlighter.getBestFragments(fq, reader, doc, field, fragSize, maxFrags); return fragments != null ? fragments.length : 0; } Index: solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java =================================================================== --- solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java (revision 1165079) +++ solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java (working copy) @@ -364,7 +364,7 @@ // FVH cannot process hl.requireFieldMatch parameter per-field basis params.getBool( HighlightParams.FIELD_MATCH, false ) ); fvh.setPhraseLimit(params.getInt(HighlightParams.PHRASE_LIMIT, Integer.MAX_VALUE)); - FieldQuery fieldQuery = fvh.getFieldQuery( query ); + FieldQuery fieldQuery = fvh.getFieldQuery( query, searcher.getIndexReader() ); // Highlight each document DocIterator iterator = docs.iterator();