Index: src/java/org/apache/lucene/analysis/CharTokenizer.java =================================================================== --- src/java/org/apache/lucene/analysis/CharTokenizer.java (revision 607519) +++ src/java/org/apache/lucene/analysis/CharTokenizer.java (working copy) @@ -45,6 +45,7 @@ } public final Token next(Token token) throws IOException { + token.clear(); int length = 0; int start = bufferIndex; char[] buffer = token.termBuffer(); Index: src/java/org/apache/lucene/analysis/Tokenizer.java =================================================================== --- src/java/org/apache/lucene/analysis/Tokenizer.java (revision 607519) +++ src/java/org/apache/lucene/analysis/Tokenizer.java (working copy) @@ -23,8 +23,12 @@ /** A Tokenizer is a TokenStream whose input is a Reader.

This is an abstract class. +

NOTE: subclasses must override at least one of {@link #next()} or {@link #next(Token)}. +

+ NOTE: subclasses overriding {@link #next(Token)} must + call {@link Token#clear()}. */ public abstract class Tokenizer extends TokenStream { Index: src/java/org/apache/lucene/analysis/KeywordTokenizer.java =================================================================== --- src/java/org/apache/lucene/analysis/KeywordTokenizer.java (revision 607519) +++ src/java/org/apache/lucene/analysis/KeywordTokenizer.java (working copy) @@ -42,6 +42,7 @@ if (!done) { done = true; int upto = 0; + result.clear(); char[] buffer = result.termBuffer(); while (true) { final int length = input.read(buffer, upto, buffer.length-upto); Index: src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java =================================================================== --- src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (revision 607519) +++ src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (working copy) @@ -92,6 +92,7 @@ return null; } + result.clear(); scanner.getText(result); final int start = scanner.yychar(); result.setStartOffset(start); Index: src/java/org/apache/lucene/analysis/TokenStream.java =================================================================== --- src/java/org/apache/lucene/analysis/TokenStream.java (revision 607519) +++ src/java/org/apache/lucene/analysis/TokenStream.java (working copy) @@ -58,14 +58,23 @@ * When possible, the input Token should be used as the * returned Token (this gives fastest tokenization * performance), but this is not required and a new Token - * may be returned. Callers may re-use a single Token - * instance for successive calls to this method and must - * therefore fully consume the previously returned Token - * before calling this method again. - * @param result a Token that may or may not be used to - * return - * @return next token in the stream or null if - * end-of-stream was hit*/ + * may be returned. Callers may re-use a single Token + * instance for successive calls to this method. + *

+ * This implicitly defines a "contract" between + * consumers (callers of this method) and + * producers (implementations of this method + * that are the source for tokens): + *

+ * Note that a {@link TokenFilter} is considered a consumer. + * @param result a Token that may or may not be used to return + * @return next token in the stream or null if end-of-stream was hit + */ public Token next(Token result) throws IOException { return next(); } Index: src/java/org/apache/lucene/index/DocumentsWriter.java =================================================================== --- src/java/org/apache/lucene/index/DocumentsWriter.java (revision 607519) +++ src/java/org/apache/lucene/index/DocumentsWriter.java (working copy) @@ -1373,7 +1373,6 @@ offsetEnd = offset-1; Token token; for(;;) { - localToken.clear(); token = stream.next(localToken); if (token == null) break; position += (token.getPositionIncrement() - 1);