Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java (revision 803705) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java (working copy) @@ -123,6 +123,7 @@ * */ public boolean incrementToken() throws IOException { + clearAttributes(); /** how many character(s) has been stored in buffer */ while(true) { // loop until we find a non-empty token Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java (revision 803705) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java (working copy) @@ -96,6 +96,7 @@ } public boolean incrementToken() throws IOException { + clearAttributes(); length = 0; start = offset; Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java (revision 803705) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java (working copy) @@ -64,6 +64,7 @@ Token clone = (Token) singleToken.clone(); + clearAttributes(); termAtt.setTermBuffer(clone.termBuffer(), 0, clone.termLength()); offsetAtt.setOffset(clone.startOffset(), clone.endOffset()); flagsAtt.setFlags(clone.getFlags()); Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java (revision 803705) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java (working copy) @@ -123,6 +123,7 @@ /** Returns the next token in the stream, or null at EOS. */ public final boolean incrementToken() throws IOException { + clearAttributes(); // if we are just starting, read the whole input if (!started) { started = true; Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java (revision 803705) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java (working copy) @@ -72,6 +72,7 @@ /** Returns the next token in the stream, or null at EOS. */ public final boolean incrementToken() throws IOException { + clearAttributes(); if (!started) { started = true; gramSize = minGram; Index: contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java =================================================================== --- contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java (revision 803705) +++ contrib/analyzers/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java (working copy) @@ -54,6 +54,7 @@ } public boolean incrementToken() throws IOException { + clearAttributes(); buffer.setLength(0); int ci; char ch, pch; Index: contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizer.java =================================================================== --- contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizer.java (revision 803705) +++ contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizer.java (working copy) @@ -184,6 +184,7 @@ restoreState(state); return true; } + clearAttributes(); int tokenType = scanner.getNextToken(); if (tokenType == WikipediaTokenizerImpl.YYEOF) { Index: src/java/org/apache/lucene/analysis/CharTokenizer.java =================================================================== --- src/java/org/apache/lucene/analysis/CharTokenizer.java (revision 803705) +++ src/java/org/apache/lucene/analysis/CharTokenizer.java (working copy) @@ -53,9 +53,9 @@ } public final boolean incrementToken() throws IOException { + clearAttributes(); int length = 0; int start = bufferIndex; - termAtt.clear(); char[] buffer = termAtt.termBuffer(); while (true) { Index: src/java/org/apache/lucene/analysis/KeywordTokenizer.java =================================================================== --- src/java/org/apache/lucene/analysis/KeywordTokenizer.java (revision 803705) +++ src/java/org/apache/lucene/analysis/KeywordTokenizer.java (working copy) @@ -49,6 +49,7 @@ public final boolean incrementToken() throws IOException { if (!done) { + clearAttributes(); done = true; int upto = 0; char[] buffer = termAtt.termBuffer(); Index: src/java/org/apache/lucene/analysis/NumericTokenStream.java =================================================================== --- src/java/org/apache/lucene/analysis/NumericTokenStream.java (revision 803705) +++ src/java/org/apache/lucene/analysis/NumericTokenStream.java (working copy) @@ -184,6 +184,7 @@ if (shift >= valSize) return false; + clearAttributes(); final char[] buffer; switch (valSize) { case 64: Index: src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java =================================================================== --- src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (revision 803705) +++ src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (working copy) @@ -148,6 +148,7 @@ * @see org.apache.lucene.analysis.TokenStream#next() */ public final boolean incrementToken() throws IOException { + clearAttributes(); int posIncr = 1; while(true) { Index: src/java/org/apache/lucene/analysis/Tokenizer.java =================================================================== --- src/java/org/apache/lucene/analysis/Tokenizer.java (revision 803705) +++ src/java/org/apache/lucene/analysis/Tokenizer.java (working copy) @@ -26,12 +26,16 @@
This is an abstract class.
- NOTE: To use the old API subclasses must override {@link #next(Token)}. - It's also OK to instead override {@link #next()} but that - method is slower compared to {@link #next(Token)}. + NOTE: subclasses must override + {@link #incrementToken()} if the new TokenStream API is used + and {@link #next(Token)} or {@link #next()} if the old + TokenStream API is used.
- NOTE: subclasses overriding {@link #next(Token)} must - call {@link Token#clear()}. + NOTE: Subclasses overriding {@link #incrementToken()} must + call {@link AttributeSource#clearAttributes()} before + setting attributes. + Subclasses overriding {@link #next(Token)} must call + {@link Token#clear()} before setting Token attributes. */ public abstract class Tokenizer extends TokenStream { @@ -85,6 +89,9 @@ this.input = CharReader.get(input); } + /** Expert: Reset the tokenizer to a new CharStream. Typically, an + * analyzer (in its reusableTokenStream method) will use + * this to re-use a previously created tokenizer. */ public void reset(CharStream input) throws IOException { this.input = input; } Index: src/java/org/apache/lucene/analysis/TokenWrapper.java =================================================================== --- src/java/org/apache/lucene/analysis/TokenWrapper.java (revision 803705) +++ src/java/org/apache/lucene/analysis/TokenWrapper.java (working copy) @@ -122,6 +122,7 @@ } // PayloadAttribute + public Payload getPayload() { return delegate.getPayload(); } @@ -130,14 +131,13 @@ delegate.setPayload(payload); } - // TokenAttribute - + // AttributeImpl + public void clear() { delegate.clear(); + // TODO: clear offset and type } - // AttributeImpl - public String toString() { return delegate.toString(); } Index: src/java/org/apache/lucene/util/Attribute.java =================================================================== --- src/java/org/apache/lucene/util/Attribute.java (revision 803705) +++ src/java/org/apache/lucene/util/Attribute.java (working copy) @@ -21,5 +21,4 @@ * Base interface for attributes. */ public interface Attribute { - public void clear(); } Index: src/java/org/apache/lucene/util/AttributeImpl.java =================================================================== --- src/java/org/apache/lucene/util/AttributeImpl.java (revision 803705) +++ src/java/org/apache/lucene/util/AttributeImpl.java (working copy) @@ -30,8 +30,9 @@ */ public abstract class AttributeImpl implements Cloneable, Serializable { /** - * Clears the values in this Attribute and resets it to its - * default value. + * Clears the values in this AttributeImpl and resets it to its + * default value. If this implementation implements more than one Attribute interface + * it clears all. */ public abstract void clear();