Index: src/test/org/apache/lucene/analysis/TestAnalyzers.java =================================================================== --- src/test/org/apache/lucene/analysis/TestAnalyzers.java (revision 596776) +++ src/test/org/apache/lucene/analysis/TestAnalyzers.java (working copy) @@ -118,6 +118,29 @@ verifyPayload(ts); } + // Make sure that tokenizers that define the non-reuse + // next() API do not see their returned tokens changed by + // subsequent filters that use the re-use next(Token) API: + public void testTokenReuse() throws IOException { + TokenStream source = new TokenStream() { + + private Token theToken; + + // TokenStream that just returns one token "A" + public Token next() { + if (theToken == null) + theToken = new Token("A", 0, 3); + char[] termBuffer = theToken.termBuffer(); + assertEquals('A', termBuffer[0]); + return theToken; + } + }; + + TokenStream filtered = new LowerCaseFilter(source); + + filtered.next(); + filtered.next(); + } } class BuffTokenFilter extends TokenFilter { Index: src/java/org/apache/lucene/analysis/Token.java =================================================================== --- src/java/org/apache/lucene/analysis/Token.java (revision 596776) +++ src/java/org/apache/lucene/analysis/Token.java (working copy) @@ -386,4 +386,19 @@ throw new RuntimeException(e); // shouldn't happen } } + + /** Make a full copy of this token into the destination + * token, and return the destination token. */ + Token copyTo(Token dest) { + initTermBuffer(); + dest.setTermBuffer(termBuffer, 0, termLength); + dest.startOffset = startOffset; + dest.endOffset = endOffset; + dest.type = type; + dest.payload = payload; + dest.positionIncrement = positionIncrement; + if (payload != null) + dest.setPayload(new Payload(payload.toByteArray(), 0, payload.length())); + return dest; + } } Index: src/java/org/apache/lucene/analysis/TokenStream.java =================================================================== --- src/java/org/apache/lucene/analysis/TokenStream.java (revision 596776) +++ src/java/org/apache/lucene/analysis/TokenStream.java (working copy) @@ -38,9 +38,11 @@ public abstract class TokenStream { /** Returns the next token in the stream, or null at EOS. - * The returned Token is a "full private copy" (not - * re-used across calls to next()) but will be slower - * than calling {@link #next(Token)} instead.. */ + * The returned Token is safe to store away indefinitely + * (will not be changed on future calls to next), and you + * must not alter any of the fields in the Token. This + * is in general slower than calling {@link + * #next(Token)}. */ public Token next() throws IOException { Token result = next(new Token()); @@ -56,17 +58,24 @@ /** Returns the next token in the stream, or null at EOS. * When possible, the input Token should be used as the * returned Token (this gives fastest tokenization - * performance), but this is not required and a new Token - * may be returned. Callers may re-use a single Token - * instance for successive calls to this method and must - * therefore fully consume the previously returned Token - * before calling this method again. + * performance) but this is not required. Callers may + * re-use a single Token instance for successive calls to + * this method and must therefore fully consume the + * previously returned Token before calling this method + * again. Furthermore, it is OK to modify the fields in + * the returned Token. * @param result a Token that may or may not be used to * return * @return next token in the stream or null if * end-of-stream was hit*/ public Token next(Token result) throws IOException { - return next(); + Token t = next(); + if (t == null) + return null; + else + // We must make a copy here because our caller is + // allowed to modify the fields in the returned token + return t.copyTo(result); } /** Resets this stream to the beginning. This is an