Index: src/java/org/apache/lucene/analysis/CharTokenizer.java =================================================================== --- src/java/org/apache/lucene/analysis/CharTokenizer.java (revision 606689) +++ src/java/org/apache/lucene/analysis/CharTokenizer.java (working copy) @@ -45,6 +45,7 @@ } public final Token next(Token token) throws IOException { + token.clear(); int length = 0; int start = bufferIndex; char[] buffer = token.termBuffer(); Index: src/java/org/apache/lucene/analysis/KeywordTokenizer.java =================================================================== --- src/java/org/apache/lucene/analysis/KeywordTokenizer.java (revision 606689) +++ src/java/org/apache/lucene/analysis/KeywordTokenizer.java (working copy) @@ -42,6 +42,7 @@ if (!done) { done = true; int upto = 0; + result.clear(); char[] buffer = result.termBuffer(); while (true) { final int length = input.read(buffer, upto, buffer.length-upto); Index: src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java =================================================================== --- src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (revision 606689) +++ src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (working copy) @@ -68,6 +68,7 @@ return null; } + result.clear(); scanner.getText(result); final int start = scanner.yychar(); result.setStartOffset(start); Index: src/java/org/apache/lucene/analysis/Tokenizer.java =================================================================== --- src/java/org/apache/lucene/analysis/Tokenizer.java (revision 606865) +++ src/java/org/apache/lucene/analysis/Tokenizer.java (working copy) @@ -23,8 +23,12 @@ /** A Tokenizer is a TokenStream whose input is a Reader.

This is an abstract class. +

NOTE: subclasses must override at least one of {@link #next()} or {@link #next(Token)}. +

+ NOTE: subclasses overiding {@link #next(Token)} must + call {@link Token#clear()}. */ public abstract class Tokenizer extends TokenStream {