Index: lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermScorer.java =================================================================== --- lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermScorer.java (revision 1074855) +++ lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermScorer.java (working copy) @@ -94,7 +94,7 @@ /* (non-Javadoc) * @see org.apache.lucene.search.highlight.Scorer#init(org.apache.lucene.analysis.TokenStream) */ - public TokenStream init(TokenStream tokenStream) { + public TokenStream init(TokenStream tokenStream, int maxDocCharsToAnalyze) { termAtt = tokenStream.addAttribute(CharTermAttribute.class); return null; } Index: lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Scorer.java =================================================================== --- lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Scorer.java (revision 1074855) +++ lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Scorer.java (working copy) @@ -37,7 +37,7 @@ * using the same {@link TokenStream} that was passed in. * @throws IOException */ - public TokenStream init(TokenStream tokenStream) throws IOException; + public TokenStream init(TokenStream tokenStream, int maxDocCharsToAnalyze) throws IOException; /** * Called when a new fragment is started for consideration. Index: lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java =================================================================== --- lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java (revision 1074855) +++ lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java (working copy) @@ -197,7 +197,7 @@ tokenStream.reset(); TextFragment currentFrag = new TextFragment(newText,newText.length(), docFrags.size()); - TokenStream newStream = fragmentScorer.init(tokenStream); + TokenStream newStream = fragmentScorer.init(tokenStream, maxDocCharsToAnalyze); if(newStream != null) { tokenStream = newStream; } Index: lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java =================================================================== --- lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (revision 1074855) +++ lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (working copy) @@ -28,6 +28,7 @@ import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexReader.AtomicReaderContext; @@ -56,6 +57,7 @@ private boolean expandMultiTermQuery; private boolean cachedTokenStream; private boolean wrapToCaching = true; + private int maxDocCharsToAnalyze; public WeightedSpanTermExtractor() { } @@ -326,7 +328,7 @@ AtomicReaderContext context = readers.get(field); if (context == null) { MemoryIndex indexer = new MemoryIndex(); - indexer.addField(field, tokenStream); + indexer.addField(field, getLimitAnalyzer(tokenStream, maxDocCharsToAnalyze)); tokenStream.reset(); IndexSearcher searcher = indexer.createSearcher(); // MEM index has only atomic ctx @@ -517,6 +519,36 @@ } + private TokenStream getLimitAnalyzer(final TokenStream tokenStream, final int limit) { + TokenStream ts = new TokenStream(tokenStream){ + private int cnt; + private OffsetAttribute offsetAttrib = getAttribute(OffsetAttribute.class); + @Override + public boolean incrementToken() throws IOException { + cnt += (offsetAttrib.endOffset() - offsetAttrib.startOffset()); + if (cnt >= limit) { + return false; + } + return tokenStream.incrementToken(); + } + + public void end() throws IOException { + tokenStream.end(); + } + + public void reset() throws IOException { + tokenStream.reset(); + } + + /** Releases resources associated with this stream. */ + public void close() throws IOException { + tokenStream.close(); + } + }; + + return ts; + } + public boolean getExpandMultiTermQuery() { return expandMultiTermQuery; } @@ -545,4 +577,8 @@ public void setWrapIfNotCachingTokenFilter(boolean wrap) { this.wrapToCaching = wrap; } + + protected final void setMaxDocCharsToAnalyze(int maxDocCharsToAnalyze) { + this.maxDocCharsToAnalyze = maxDocCharsToAnalyze; + } } Index: lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java =================================================================== --- lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java (revision 1074855) +++ lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java (working copy) @@ -54,6 +54,7 @@ private IndexReader reader; private boolean skipInitExtractor; private boolean wrapToCaching = true; + private int maxCharsToAnalyze; /** * @param query Query to use for highlighting @@ -173,7 +174,8 @@ /* (non-Javadoc) * @see org.apache.lucene.search.highlight.Scorer#init(org.apache.lucene.analysis.TokenStream) */ - public TokenStream init(TokenStream tokenStream) throws IOException { + public TokenStream init(TokenStream tokenStream, int maxDocCharsToAnalyze) throws IOException { + this.maxCharsToAnalyze = maxDocCharsToAnalyze; position = -1; termAtt = tokenStream.addAttribute(CharTermAttribute.class); posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class); @@ -209,7 +211,7 @@ private TokenStream initExtractor(TokenStream tokenStream) throws IOException { WeightedSpanTermExtractor qse = defaultField == null ? new WeightedSpanTermExtractor() : new WeightedSpanTermExtractor(defaultField); - + qse.setMaxDocCharsToAnalyze(maxCharsToAnalyze); qse.setExpandMultiTermQuery(expandMultiTermQuery); qse.setWrapIfNotCachingTokenFilter(wrapToCaching); if (reader == null) { Index: lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java =================================================================== --- lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (revision 1074855) +++ lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (working copy) @@ -1274,7 +1274,7 @@ return 1; } - public TokenStream init(TokenStream tokenStream) { + public TokenStream init(TokenStream tokenStream, int maxCharsToAnalyze) { return null; } });