Index: src/java/org/apache/lucene/analysis/Analyzer.java =================================================================== --- src/java/org/apache/lucene/analysis/Analyzer.java (revision 803904) +++ src/java/org/apache/lucene/analysis/Analyzer.java (working copy) @@ -34,18 +34,54 @@ * then be applied to the output of the Tokenizer. */ public abstract class Analyzer { + + /** + * Wraps a {@link Tokenizer} and {@link TokenStream} for easy reuse by + * {@link Analyzer#reusableTokenStream(String, Reader)}. + * {@link #reset(CharStream)} and {@link #reset(Reader)} reset both the + * {@link Tokenizer} and the {@link TokenStream}. + */ + protected static class Streams { + private final Tokenizer tokenizer; + private final TokenStream stream; + + public Streams(Tokenizer tokenizer, TokenStream stream) { + this.tokenizer = tokenizer; + this.stream = stream; + } + + public Tokenizer getTokenizer() { return tokenizer; } + + public TokenStream getTokenStream() { return stream; } + + public void reset(Reader reader) throws IOException { + tokenizer.reset(reader); + stream.reset(); + } + + public void reset(CharStream charstream) throws IOException { + tokenizer.reset(charstream); + stream.reset(); + } + + } + /** Creates a TokenStream which tokenizes all the text in the provided * Reader. Must be able to handle null field name for * backward compatibility. */ public abstract TokenStream tokenStream(String fieldName, Reader reader); - /** Creates a TokenStream that is allowed to be re-used - * from the previous time that the same thread called - * this method. Callers that do not need to use more - * than one TokenStream at the same time from this - * analyzer should use this method for better - * performance. + /** + * Creates a TokenStream that is allowed to be re-used from the previous time + * that the same thread called this method. Callers that do not need to use + * more than one TokenStream at the same time from this analyzer should use + * this method for better performance. + *
+ * NOTE: the default implementation returns a new instance of a + * {@link TokenStream} by calling {@link #tokenStream(String, Reader)}, for + * backwards compatibility. {@link ReusingAnalyzer} provides an implementation + * which reuses the {@link TokenStream} through {@link Streams}. */ public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { return tokenStream(fieldName, reader); Index: src/java/org/apache/lucene/analysis/ReusingAnalyzer.java =================================================================== --- src/java/org/apache/lucene/analysis/ReusingAnalyzer.java (revision 0) +++ src/java/org/apache/lucene/analysis/ReusingAnalyzer.java (revision 0) @@ -0,0 +1,68 @@ +package org.apache.lucene.analysis; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; + +/** + * An {@link Analyzer} implementation which provides a default implementation + * for {@link #reusableTokenStream(String, Reader)}. Extending classes need to + * implement {@link #getTokenizer(Reader)} and + * {@link #internalTokenStream(String, Tokenizer)} only. + * + * @since 2.9 + */ +public abstract class ReusingAnalyzer extends Analyzer { + + /** + * Creates a {@link TokenStream}, much like + * {@link #tokenStream(String, Reader)}, only it accepts a {@link Tokenizer} + * already. This method allows easy reuse of the {@link Tokenizer} as well as + * the {@link TokenStream} by {@link #reusableTokenStream(String, Reader)}. + * + * @see #tokenStream(String, Reader) + */ + protected abstract TokenStream internalTokenStream(String fieldName, Tokenizer tokenizer); + + /** + * Creates a {@link Tokenizer} which tokenizes the input {@link Reader}. Used + * by {@link #reusableTokenStream(String, Reader)} and + * {@link #tokenStream(String, Reader)} for easy reuse of the + * {@link Tokenizer}. + */ + protected abstract Tokenizer getTokenizer(Reader reader); + + public TokenStream tokenStream(String fieldName, Reader reader) { + return internalTokenStream(fieldName, getTokenizer(reader)); + } + + public TokenStream reusableTokenStream(String fieldName, Reader reader) + throws IOException { + Streams streams = (Streams) getPreviousTokenStream(); + if (streams == null) { + Tokenizer tokenizer = getTokenizer(reader); + streams = new Streams(tokenizer, internalTokenStream(fieldName, tokenizer)); + setPreviousTokenStream(streams); + } else { + streams.reset(reader); + } + return streams.getTokenStream(); + } + +} Property changes on: src\java\org\apache\lucene\analysis\ReusingAnalyzer.java ___________________________________________________________________ Added: svn:keywords + Date Author Id Revision HeadURL Added: svn:eol-style + native