Index: src/java/org/apache/lucene/analysis/Tokenizer.java =================================================================== --- src/java/org/apache/lucene/analysis/Tokenizer.java (revision 786929) +++ src/java/org/apache/lucene/analysis/Tokenizer.java (working copy) @@ -45,14 +45,14 @@ public abstract class Tokenizer extends TokenStream { /** The text source for this Tokenizer. */ - protected Reader input; + protected CharStream input; /** Construct a tokenizer with null input. */ protected Tokenizer() {} /** Construct a token stream processing the given input. */ protected Tokenizer(Reader input) { - this.input = input; + this.input = CharReader.get(input); } /** By default, closes the input Reader. */ @@ -64,7 +64,7 @@ * analyzer (in its reusableTokenStream method) will use * this to re-use a previously created tokenizer. */ public void reset(Reader input) throws IOException { - this.input = input; + this.input = CharReader.get(input); } } Index: src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java =================================================================== --- src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (revision 786929) +++ src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (working copy) @@ -26,6 +26,8 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; +import org.apache.lucene.analysis.CharReader; +import org.apache.lucene.analysis.CharStream; /** A grammar-based tokenizer constructed with JFlex * @@ -91,7 +93,7 @@ private boolean replaceInvalidAcronym; void setInput(Reader reader) { - this.input = reader; + this.input = CharReader.get(reader); } private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH; @@ -126,7 +128,7 @@ */ public StandardTokenizer(Reader input, boolean replaceInvalidAcronym) { this.replaceInvalidAcronym = replaceInvalidAcronym; - this.input = input; + setInput(input); this.scanner = new StandardTokenizerImpl(input); termAtt = (TermAttribute) addAttribute(TermAttribute.class); offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class); @@ -240,7 +242,7 @@ } public void reset(Reader reader) throws IOException { - input = reader; + setInput(reader); reset(); }