Index: src/java/org/apache/lucene/analysis/CharTokenizer.java =================================================================== --- src/java/org/apache/lucene/analysis/CharTokenizer.java (revision 607519) +++ src/java/org/apache/lucene/analysis/CharTokenizer.java (working copy) @@ -45,6 +45,7 @@ } public final Token next(Token token) throws IOException { + token.clear(); int length = 0; int start = bufferIndex; char[] buffer = token.termBuffer(); Index: src/java/org/apache/lucene/analysis/Tokenizer.java =================================================================== --- src/java/org/apache/lucene/analysis/Tokenizer.java (revision 607519) +++ src/java/org/apache/lucene/analysis/Tokenizer.java (working copy) @@ -23,8 +23,12 @@ /** A Tokenizer is a TokenStream whose input is a Reader.
This is an abstract class. +
NOTE: subclasses must override at least one of {@link #next()} or {@link #next(Token)}. +
+ NOTE: subclasses overriding {@link #next(Token)} must + call {@link Token#clear()}. */ public abstract class Tokenizer extends TokenStream { Index: src/java/org/apache/lucene/analysis/KeywordTokenizer.java =================================================================== --- src/java/org/apache/lucene/analysis/KeywordTokenizer.java (revision 607519) +++ src/java/org/apache/lucene/analysis/KeywordTokenizer.java (working copy) @@ -42,6 +42,7 @@ if (!done) { done = true; int upto = 0; + result.clear(); char[] buffer = result.termBuffer(); while (true) { final int length = input.read(buffer, upto, buffer.length-upto); Index: src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java =================================================================== --- src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (revision 607519) +++ src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (working copy) @@ -92,6 +92,7 @@ return null; } + result.clear(); scanner.getText(result); final int start = scanner.yychar(); result.setStartOffset(start); Index: src/java/org/apache/lucene/analysis/TokenStream.java =================================================================== --- src/java/org/apache/lucene/analysis/TokenStream.java (revision 607519) +++ src/java/org/apache/lucene/analysis/TokenStream.java (working copy) @@ -58,14 +58,23 @@ * When possible, the input Token should be used as the * returned Token (this gives fastest tokenization * performance), but this is not required and a new Token - * may be returned. Callers may re-use a single Token - * instance for successive calls to this method and must - * therefore fully consume the previously returned Token - * before calling this method again. - * @param result a Token that may or may not be used to - * return - * @return next token in the stream or null if - * end-of-stream was hit*/ + * may be returned. Callers may re-use a single Token + * instance for successive calls to this method. + *
+ * This implicitly defines a "contract" between + * consumers (callers of this method) and + * producers (implementations of this method + * that are the source for tokens): + *