Index: solr/CHANGES.txt =================================================================== --- solr/CHANGES.txt (revision 1139974) +++ solr/CHANGES.txt (working copy) @@ -254,6 +254,9 @@ * SOLR-2458: post.jar enhanced to handle JSON, CSV and (janhoy) +* LUCENE-3234: add a new parameter hl.phraseLimit for FastVectorHighlighter speed up. + (Mike Sokolov via koji) + Optimizations ---------------------- Index: solr/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java =================================================================== --- solr/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java (revision 1139974) +++ solr/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java (working copy) @@ -362,6 +362,7 @@ params.getBool( HighlightParams.USE_PHRASE_HIGHLIGHTER, true ), // FVH cannot process hl.requireFieldMatch parameter per-field basis params.getBool( HighlightParams.FIELD_MATCH, false ) ); + fvh.setPhraseLimit(params.getInt(HighlightParams.PHRASE_LIMIT, Integer.MAX_VALUE)); FieldQuery fieldQuery = fvh.getFieldQuery( query ); // Highlight each document Index: solr/src/common/org/apache/solr/common/params/HighlightParams.java =================================================================== --- solr/src/common/org/apache/solr/common/params/HighlightParams.java (revision 1139974) +++ solr/src/common/org/apache/solr/common/params/HighlightParams.java (working copy) @@ -45,6 +45,7 @@ public static final String USE_FVH = HIGHLIGHT + ".useFastVectorHighlighter"; public static final String TAG_PRE = HIGHLIGHT + ".tag.pre"; public static final String TAG_POST = HIGHLIGHT + ".tag.post"; + public static final String PHRASE_LIMIT = HIGHLIGHT + ".phraseLimit"; public static final String MULTI_VALUED_SEPARATOR = HIGHLIGHT + ".multiValuedSeparatorChar"; // Formatter Index: lucene/contrib/CHANGES.txt =================================================================== --- lucene/contrib/CHANGES.txt (revision 1139974) +++ lucene/contrib/CHANGES.txt (working copy) @@ -55,8 +55,17 @@ ======================= Lucene 3.x (not yet released) ================ -(No changes) +New Features + * LUCENE-3234: provide a limit on phrase analysis in FastVectorHighlighter for + highlighting speed up. Use FastVectorHighlighter.setPhraseLimit() to set limit + (e.g. 5000). (Mike Sokolov via Koji Sekiguchi) + +API Changes + +Bug Fixes + + ======================= Lucene 3.3.0 ======================= New Features Index: lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java =================================================================== --- lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java (revision 1139974) +++ lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java (working copy) @@ -188,4 +188,34 @@ assertEquals( 1, fpl.phraseList.size() ); assertEquals( "sppeeeed(1.0)((88,93))", fpl.phraseList.get( 0 ).toString() ); } + + /* This test shows a big speedup from limiting the number of analyzed phrases in + * this bad case for FieldPhraseList */ + /* But it is not reliable as a unit test since it is timing-dependent + public void testManyRepeatedTerms() throws Exception { + long t = System.currentTimeMillis(); + testManyTermsWithLimit (-1); + long t1 = System.currentTimeMillis(); + testManyTermsWithLimit (1); + long t2 = System.currentTimeMillis(); + assertTrue (t2-t1 * 1000 < t1-t); + } + private void testManyTermsWithLimit (int limit) throws Exception { + StringBuilder buf = new StringBuilder (); + for (int i = 0; i < 16000; i++) { + buf.append("a b c "); + } + make1d1fIndex( buf.toString()); + + Query query = tq("a"); + FieldQuery fq = new FieldQuery( query, true, true ); + FieldTermStack stack = new FieldTermStack( reader, 0, F, fq ); + FieldPhraseList fpl = new FieldPhraseList( stack, fq, limit); + if (limit < 0 || limit > 16000) + assertEquals( 16000, fpl.phraseList.size() ); + else + assertEquals( limit, fpl.phraseList.size() ); + assertEquals( "a(1.0)((0,1))", fpl.phraseList.get( 0 ).toString() ); + } + */ } Index: lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FastVectorHighlighter.java =================================================================== --- lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FastVectorHighlighter.java (revision 1139974) +++ lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FastVectorHighlighter.java (working copy) @@ -35,6 +35,7 @@ private final boolean fieldMatch; private final FragListBuilder fragListBuilder; private final FragmentsBuilder fragmentsBuilder; + private int phraseLimit = Integer.MAX_VALUE; /** * the default constructor. @@ -173,7 +174,7 @@ final FieldQuery fieldQuery, IndexReader reader, int docId, String fieldName, int fragCharSize ) throws IOException { FieldTermStack fieldTermStack = new FieldTermStack( reader, docId, fieldName, fieldQuery ); - FieldPhraseList fieldPhraseList = new FieldPhraseList( fieldTermStack, fieldQuery ); + FieldPhraseList fieldPhraseList = new FieldPhraseList( fieldTermStack, fieldQuery, phraseLimit ); return fragListBuilder.createFieldFragList( fieldPhraseList, fragCharSize ); } @@ -190,4 +191,15 @@ * @return whether fieldMatch or not */ public boolean isFieldMatch(){ return fieldMatch; } + + /** + * @return the maximum number of phrases to analyze when searching for the highest-scoring phrase. + */ + public int getPhraseLimit () { return phraseLimit; } + + /** + * set the maximum number of phrases to analyze when searching for the highest-scoring phrase. + * The default is 5000. To ensure that all phrases are analyzed, use a negative number or Integer.MAX_VALUE. + */ + public void setPhraseLimit (int phraseLimit) { this.phraseLimit = phraseLimit; } } Index: lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java =================================================================== --- lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java (revision 1139974) +++ lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java (working copy) @@ -30,21 +30,32 @@ public class FieldPhraseList { LinkedList phraseList = new LinkedList(); - + /** + * create a FieldPhraseList that has no limit on the number of phrases to analyze + * + * @param fieldTermStack FieldTermStack object + * @param fieldQuery FieldQuery object + */ + public FieldPhraseList( FieldTermStack fieldTermStack, FieldQuery fieldQuery){ + this (fieldTermStack, fieldQuery, Integer.MAX_VALUE); + } + + /** * a constructor. * * @param fieldTermStack FieldTermStack object * @param fieldQuery FieldQuery object + * @param phraseLimit maximum size of phraseList */ - public FieldPhraseList( FieldTermStack fieldTermStack, FieldQuery fieldQuery ){ + public FieldPhraseList( FieldTermStack fieldTermStack, FieldQuery fieldQuery, int phraseLimit){ final String field = fieldTermStack.getFieldName(); LinkedList phraseCandidate = new LinkedList(); QueryPhraseMap currMap = null; QueryPhraseMap nextMap = null; - while( !fieldTermStack.isEmpty() ){ - + while( !fieldTermStack.isEmpty() && (phraseList.size() < phraseLimit) ) + { phraseCandidate.clear(); TermInfo ti = fieldTermStack.pop();