Index: lucene/src/test/org/apache/lucene/analysis/TestPerFieldAnalzyerWrapper.java =================================================================== --- lucene/src/test/org/apache/lucene/analysis/TestPerFieldAnalzyerWrapper.java (revision 682416) +++ lucene/src/test/org/apache/lucene/analysis/TestPerFieldAnalzyerWrapper.java (working copy) @@ -29,16 +29,17 @@ TokenStream tokenStream = analyzer.tokenStream("field", new StringReader(text)); - Token token = tokenStream.next(); + Token token = new Token(); + token = tokenStream.next(token); assertEquals("WhitespaceAnalyzer does not lowercase", "Qwerty", - token.termText()); + token.term()); tokenStream = analyzer.tokenStream("special", new StringReader(text)); - token = tokenStream.next(); + token = tokenStream.next(token); assertEquals("SimpleAnalyzer lowercases", "qwerty", - token.termText()); + token.term()); } } Index: lucene/src/test/org/apache/lucene/analysis/TeeSinkTokenTest.java =================================================================== --- lucene/src/test/org/apache/lucene/analysis/TeeSinkTokenTest.java (revision 682416) +++ lucene/src/test/org/apache/lucene/analysis/TeeSinkTokenTest.java (working copy) @@ -63,23 +63,22 @@ SinkTokenizer sink1 = new SinkTokenizer(null) { public void add(Token t) { - if (t != null && t.termText().equalsIgnoreCase("The")) { + if (t != null && t.term().equalsIgnoreCase("The")) { super.add(t); } } }; TokenStream source = new TeeTokenFilter(new WhitespaceTokenizer(new StringReader(buffer1.toString())), sink1); - Token token = null; int i = 0; - while ((token = source.next()) != null) { - assertTrue(token.termText() + " is not equal to " + tokens1[i], token.termText().equals(tokens1[i]) == true); + for (Token token = source.next(new Token()); token != null; token = source.next(token)) { + assertTrue(token.term() + " is not equal to " + tokens1[i], token.term().equals(tokens1[i]) == true); i++; } assertTrue(i + " does not equal: " + tokens1.length, i == tokens1.length); assertTrue("sink1 Size: " + sink1.getTokens().size() + " is not: " + 2, sink1.getTokens().size() == 2); i = 0; - while ((token = sink1.next()) != null) { - assertTrue(token.termText() + " is not equal to " + "The", token.termText().equalsIgnoreCase("The") == true); + for (Token token = sink1.next(new Token()); token != null; token = sink1.next(token)) { + assertTrue(token.term() + " is not equal to " + "The", token.term().equalsIgnoreCase("The") == true); i++; } assertTrue(i + " does not equal: " + sink1.getTokens().size(), i == sink1.getTokens().size()); @@ -88,54 +87,53 @@ public void testMultipleSources() throws Exception { SinkTokenizer theDetector = new SinkTokenizer(null) { public void add(Token t) { - if (t != null && t.termText().equalsIgnoreCase("The")) { + if (t != null && t.term().equalsIgnoreCase("The")) { super.add(t); } } }; SinkTokenizer dogDetector = new SinkTokenizer(null) { public void add(Token t) { - if (t != null && t.termText().equalsIgnoreCase("Dogs")) { + if (t != null && t.term().equalsIgnoreCase("Dogs")) { super.add(t); } } }; TokenStream source1 = new CachingTokenFilter(new TeeTokenFilter(new TeeTokenFilter(new WhitespaceTokenizer(new StringReader(buffer1.toString())), theDetector), dogDetector)); TokenStream source2 = new TeeTokenFilter(new TeeTokenFilter(new WhitespaceTokenizer(new StringReader(buffer2.toString())), theDetector), dogDetector); - Token token = null; int i = 0; - while ((token = source1.next()) != null) { - assertTrue(token.termText() + " is not equal to " + tokens1[i], token.termText().equals(tokens1[i]) == true); + for (Token token = source1.next(new Token()); token != null; token = source1.next(token)) { + assertTrue(token.term() + " is not equal to " + tokens1[i], token.term().equals(tokens1[i]) == true); i++; } assertTrue(i + " does not equal: " + tokens1.length, i == tokens1.length); assertTrue("theDetector Size: " + theDetector.getTokens().size() + " is not: " + 2, theDetector.getTokens().size() == 2); assertTrue("dogDetector Size: " + dogDetector.getTokens().size() + " is not: " + 1, dogDetector.getTokens().size() == 1); i = 0; - while ((token = source2.next()) != null) { - assertTrue(token.termText() + " is not equal to " + tokens2[i], token.termText().equals(tokens2[i]) == true); + for (Token token = source2.next(new Token()); token != null; token = source2.next(token)) { + assertTrue(token.term() + " is not equal to " + tokens2[i], token.term().equals(tokens2[i]) == true); i++; } assertTrue(i + " does not equal: " + tokens2.length, i == tokens2.length); assertTrue("theDetector Size: " + theDetector.getTokens().size() + " is not: " + 4, theDetector.getTokens().size() == 4); assertTrue("dogDetector Size: " + dogDetector.getTokens().size() + " is not: " + 2, dogDetector.getTokens().size() == 2); i = 0; - while ((token = theDetector.next()) != null) { - assertTrue(token.termText() + " is not equal to " + "The", token.termText().equalsIgnoreCase("The") == true); + for (Token token = theDetector.next(new Token()); token != null; token = theDetector.next(token)) { + assertTrue(token.term() + " is not equal to " + "The", token.term().equalsIgnoreCase("The") == true); i++; } assertTrue(i + " does not equal: " + theDetector.getTokens().size(), i == theDetector.getTokens().size()); i = 0; - while ((token = dogDetector.next()) != null) { - assertTrue(token.termText() + " is not equal to " + "Dogs", token.termText().equalsIgnoreCase("Dogs") == true); + for (Token token = dogDetector.next(new Token()); token != null; token = dogDetector.next(token)) { + assertTrue(token.term() + " is not equal to " + "Dogs", token.term().equalsIgnoreCase("Dogs") == true); i++; } assertTrue(i + " does not equal: " + dogDetector.getTokens().size(), i == dogDetector.getTokens().size()); source1.reset(); TokenStream lowerCasing = new LowerCaseFilter(source1); i = 0; - while ((token = lowerCasing.next()) != null) { - assertTrue(token.termText() + " is not equal to " + tokens1[i].toLowerCase(), token.termText().equals(tokens1[i].toLowerCase()) == true); + for (Token token = lowerCasing.next(new Token()); token != null; token = lowerCasing.next(token)) { + assertTrue(token.term() + " is not equal to " + tokens1[i].toLowerCase(), token.term().equals(tokens1[i].toLowerCase()) == true); i++; } assertTrue(i + " does not equal: " + tokens1.length, i == tokens1.length); @@ -172,7 +170,7 @@ for (int i = 0; i < tmp.size(); i++) { Token tfTok = (Token) tmp.get(i); Token sinkTok = (Token) sinkList.get(i); - assertTrue(tfTok.termText() + " is not equal to " + sinkTok.termText() + " at token: " + i, tfTok.termText().equals(sinkTok.termText()) == true); + assertTrue(tfTok.term() + " is not equal to " + sinkTok.term() + " at token: " + i, tfTok.term().equals(sinkTok.term()) == true); } //simulate two fields, each being analyzed once, for 20 documents @@ -254,7 +252,7 @@ public void add(Token t) { if (t != null && count % modCount == 0) { - lst.add(t.clone()); + super.add(t); } count++; } Index: lucene/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java =================================================================== --- lucene/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java (revision 682416) +++ lucene/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java (working copy) @@ -42,11 +42,16 @@ TokenStream stream = new TokenStream() { private int index = 0; - public Token next() throws IOException { + public Token next(Token token) throws IOException { if (index == tokens.length) { return null; } else { - return new Token(tokens[index++], 0, 0); + token.clear(); + token.setTermBuffer(tokens[index++]); + token.setStartOffset(0); + token.setEndOffset(0); + token.setType(Token.DEFAULT_TYPE); + return token; } } @@ -91,10 +96,9 @@ private void checkTokens(TokenStream stream) throws IOException { int count = 0; - Token token; - while ((token = stream.next()) != null) { + for (Token token = stream.next(new Token()); token != null; token = stream.next(token)) { assertTrue(count < tokens.length); - assertEquals(tokens[count], token.termText()); + assertEquals(tokens[count], token.term()); count++; } Index: lucene/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java =================================================================== --- lucene/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java (revision 682416) +++ lucene/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java (working copy) @@ -35,10 +35,11 @@ public void assertAnalyzesTo(Analyzer a, String input, String[] expectedImages, String[] expectedTypes, int[] expectedPosIncrs) throws Exception { TokenStream ts = a.tokenStream("dummy", new StringReader(input)); + Token t = new Token(); for (int i = 0; i < expectedImages.length; i++) { - Token t = ts.next(); + t = ts.next(t); assertNotNull(t); - assertEquals(expectedImages[i], t.termText()); + assertEquals(expectedImages[i], t.term()); if (expectedTypes != null) { assertEquals(expectedTypes[i], t.type()); } @@ -46,7 +47,7 @@ assertEquals(expectedPosIncrs[i], t.getPositionIncrement()); } } - assertNull(ts.next()); + assertNull(ts.next(t)); ts.close(); } Index: lucene/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java =================================================================== --- lucene/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java (revision 682416) +++ lucene/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java (working copy) @@ -25,77 +25,78 @@ public void testU() throws Exception { TokenStream stream = new WhitespaceTokenizer(new StringReader("Des mot clés À LA CHAÎNE À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï Ð Ñ Ò Ó Ô Õ Ö Ø Œ Þ Ù Ú Û Ü Ý Ÿ à á â ã ä å æ ç è é ê ë ì í î ï ð ñ ò ó ô õ ö ø œ ß þ ù ú û ü ý ÿ")); ISOLatin1AccentFilter filter = new ISOLatin1AccentFilter(stream); - assertEquals("Des", filter.next().termText()); - assertEquals("mot", filter.next().termText()); - assertEquals("cles", filter.next().termText()); - assertEquals("A", filter.next().termText()); - assertEquals("LA", filter.next().termText()); - assertEquals("CHAINE", filter.next().termText()); - assertEquals("A", filter.next().termText()); - assertEquals("A", filter.next().termText()); - assertEquals("A", filter.next().termText()); - assertEquals("A", filter.next().termText()); - assertEquals("A", filter.next().termText()); - assertEquals("A", filter.next().termText()); - assertEquals("AE", filter.next().termText()); - assertEquals("C", filter.next().termText()); - assertEquals("E", filter.next().termText()); - assertEquals("E", filter.next().termText()); - assertEquals("E", filter.next().termText()); - assertEquals("E", filter.next().termText()); - assertEquals("I", filter.next().termText()); - assertEquals("I", filter.next().termText()); - assertEquals("I", filter.next().termText()); - assertEquals("I", filter.next().termText()); - assertEquals("D", filter.next().termText()); - assertEquals("N", filter.next().termText()); - assertEquals("O", filter.next().termText()); - assertEquals("O", filter.next().termText()); - assertEquals("O", filter.next().termText()); - assertEquals("O", filter.next().termText()); - assertEquals("O", filter.next().termText()); - assertEquals("O", filter.next().termText()); - assertEquals("OE", filter.next().termText()); - assertEquals("TH", filter.next().termText()); - assertEquals("U", filter.next().termText()); - assertEquals("U", filter.next().termText()); - assertEquals("U", filter.next().termText()); - assertEquals("U", filter.next().termText()); - assertEquals("Y", filter.next().termText()); - assertEquals("Y", filter.next().termText()); - assertEquals("a", filter.next().termText()); - assertEquals("a", filter.next().termText()); - assertEquals("a", filter.next().termText()); - assertEquals("a", filter.next().termText()); - assertEquals("a", filter.next().termText()); - assertEquals("a", filter.next().termText()); - assertEquals("ae", filter.next().termText()); - assertEquals("c", filter.next().termText()); - assertEquals("e", filter.next().termText()); - assertEquals("e", filter.next().termText()); - assertEquals("e", filter.next().termText()); - assertEquals("e", filter.next().termText()); - assertEquals("i", filter.next().termText()); - assertEquals("i", filter.next().termText()); - assertEquals("i", filter.next().termText()); - assertEquals("i", filter.next().termText()); - assertEquals("d", filter.next().termText()); - assertEquals("n", filter.next().termText()); - assertEquals("o", filter.next().termText()); - assertEquals("o", filter.next().termText()); - assertEquals("o", filter.next().termText()); - assertEquals("o", filter.next().termText()); - assertEquals("o", filter.next().termText()); - assertEquals("o", filter.next().termText()); - assertEquals("oe", filter.next().termText()); - assertEquals("ss", filter.next().termText()); - assertEquals("th", filter.next().termText()); - assertEquals("u", filter.next().termText()); - assertEquals("u", filter.next().termText()); - assertEquals("u", filter.next().termText()); - assertEquals("u", filter.next().termText()); - assertEquals("y", filter.next().termText()); - assertEquals("y", filter.next().termText()); - assertNull(filter.next()); + Token token = new Token(); + assertEquals("Des", filter.next(token).term()); + assertEquals("mot", filter.next(token).term()); + assertEquals("cles", filter.next(token).term()); + assertEquals("A", filter.next(token).term()); + assertEquals("LA", filter.next(token).term()); + assertEquals("CHAINE", filter.next(token).term()); + assertEquals("A", filter.next(token).term()); + assertEquals("A", filter.next(token).term()); + assertEquals("A", filter.next(token).term()); + assertEquals("A", filter.next(token).term()); + assertEquals("A", filter.next(token).term()); + assertEquals("A", filter.next(token).term()); + assertEquals("AE", filter.next(token).term()); + assertEquals("C", filter.next(token).term()); + assertEquals("E", filter.next(token).term()); + assertEquals("E", filter.next(token).term()); + assertEquals("E", filter.next(token).term()); + assertEquals("E", filter.next(token).term()); + assertEquals("I", filter.next(token).term()); + assertEquals("I", filter.next(token).term()); + assertEquals("I", filter.next(token).term()); + assertEquals("I", filter.next(token).term()); + assertEquals("D", filter.next(token).term()); + assertEquals("N", filter.next(token).term()); + assertEquals("O", filter.next(token).term()); + assertEquals("O", filter.next(token).term()); + assertEquals("O", filter.next(token).term()); + assertEquals("O", filter.next(token).term()); + assertEquals("O", filter.next(token).term()); + assertEquals("O", filter.next(token).term()); + assertEquals("OE", filter.next(token).term()); + assertEquals("TH", filter.next(token).term()); + assertEquals("U", filter.next(token).term()); + assertEquals("U", filter.next(token).term()); + assertEquals("U", filter.next(token).term()); + assertEquals("U", filter.next(token).term()); + assertEquals("Y", filter.next(token).term()); + assertEquals("Y", filter.next(token).term()); + assertEquals("a", filter.next(token).term()); + assertEquals("a", filter.next(token).term()); + assertEquals("a", filter.next(token).term()); + assertEquals("a", filter.next(token).term()); + assertEquals("a", filter.next(token).term()); + assertEquals("a", filter.next(token).term()); + assertEquals("ae", filter.next(token).term()); + assertEquals("c", filter.next(token).term()); + assertEquals("e", filter.next(token).term()); + assertEquals("e", filter.next(token).term()); + assertEquals("e", filter.next(token).term()); + assertEquals("e", filter.next(token).term()); + assertEquals("i", filter.next(token).term()); + assertEquals("i", filter.next(token).term()); + assertEquals("i", filter.next(token).term()); + assertEquals("i", filter.next(token).term()); + assertEquals("d", filter.next(token).term()); + assertEquals("n", filter.next(token).term()); + assertEquals("o", filter.next(token).term()); + assertEquals("o", filter.next(token).term()); + assertEquals("o", filter.next(token).term()); + assertEquals("o", filter.next(token).term()); + assertEquals("o", filter.next(token).term()); + assertEquals("o", filter.next(token).term()); + assertEquals("oe", filter.next(token).term()); + assertEquals("ss", filter.next(token).term()); + assertEquals("th", filter.next(token).term()); + assertEquals("u", filter.next(token).term()); + assertEquals("u", filter.next(token).term()); + assertEquals("u", filter.next(token).term()); + assertEquals("u", filter.next(token).term()); + assertEquals("y", filter.next(token).term()); + assertEquals("y", filter.next(token).term()); + assertNull(filter.next(token)); } } Index: lucene/src/test/org/apache/lucene/analysis/TestLengthFilter.java =================================================================== --- lucene/src/test/org/apache/lucene/analysis/TestLengthFilter.java (revision 682416) +++ lucene/src/test/org/apache/lucene/analysis/TestLengthFilter.java (working copy) @@ -27,10 +27,11 @@ TokenStream stream = new WhitespaceTokenizer( new StringReader("short toolong evenmuchlongertext a ab toolong foo")); LengthFilter filter = new LengthFilter(stream, 2, 6); - assertEquals("short", filter.next().termText()); - assertEquals("ab", filter.next().termText()); - assertEquals("foo", filter.next().termText()); - assertNull(filter.next()); + Token token = new Token(); + assertEquals("short", filter.next(token).term()); + assertEquals("ab", filter.next(token).term()); + assertEquals("foo", filter.next(token).term()); + assertNull(filter.next(token)); } } Index: lucene/src/test/org/apache/lucene/analysis/TestAnalyzers.java =================================================================== --- lucene/src/test/org/apache/lucene/analysis/TestAnalyzers.java (revision 682416) +++ lucene/src/test/org/apache/lucene/analysis/TestAnalyzers.java (working copy) @@ -17,13 +17,14 @@ * limitations under the License. */ -import java.io.*; +import java.io.IOException; +import java.io.StringReader; +import java.util.LinkedList; import java.util.List; -import java.util.LinkedList; +import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.index.Payload; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.index.Payload; -import org.apache.lucene.analysis.standard.StandardTokenizer; public class TestAnalyzers extends LuceneTestCase { @@ -35,12 +36,13 @@ String input, String[] output) throws Exception { TokenStream ts = a.tokenStream("dummy", new StringReader(input)); + Token t = new Token(); for (int i=0; i test with enable-increments-"+(enableIcrements?"enabled":"disabled")); stpf.setEnablePositionIncrements(enableIcrements); + Token t = new Token(); for (int i=0; i<20; i+=3) { - Token t = stpf.next(); + t = stpf.next(t); log("Token "+i+": "+t); String w = English.intToEnglish(i).trim(); - assertEquals("expecting token "+i+" to be "+w,w,t.termText()); + assertEquals("expecting token "+i+" to be "+w,w,t.term()); assertEquals("all but first token must have position increment of 3",enableIcrements?(i==0?1:3):1,t.getPositionIncrement()); } - assertNull(stpf.next()); + assertNull(stpf.next(t)); } // print debug info depending on VERBOSE Index: lucene/src/java/org/apache/lucene/analysis/SinkTokenizer.java =================================================================== --- lucene/src/java/org/apache/lucene/analysis/SinkTokenizer.java (revision 682416) +++ lucene/src/java/org/apache/lucene/analysis/SinkTokenizer.java (working copy) @@ -22,11 +22,11 @@ } public SinkTokenizer() { - this.lst = new ArrayList(); + this.lst = new ArrayList/**/(); } public SinkTokenizer(int initCap){ - this.lst = new ArrayList(initCap); + this.lst = new ArrayList/**/(initCap); } /** @@ -35,6 +35,8 @@ * WARNING: Adding tokens to this list requires the {@link #reset()} method to be called in order for them * to be made available. Also, this Tokenizer does nothing to protect against {@link java.util.ConcurrentModificationException}s * in the case of adds happening while {@link #next(org.apache.lucene.analysis.Token)} is being called. + *

+ * WARNING: Since this SinkTokenizer can be reset and the cached tokens made available again, do not modify them. Modify clones instead. * * @return A List of {@link org.apache.lucene.analysis.Token}s */ @@ -47,9 +49,14 @@ * @return The next {@link org.apache.lucene.analysis.Token} in the Sink. * @throws IOException */ - public Token next() throws IOException { + public Token next(Token token) throws IOException { if (iter == null) iter = lst.iterator(); - return iter.hasNext() ? (Token) iter.next() : null; + // Since this TokenStream can be reset we have to maintain the tokens as immutable + if (iter.hasNext()) { + token = (Token) iter.next(); + return (Token) token.clone(); + } + return null; } Index: lucene/src/java/org/apache/lucene/analysis/CachingTokenFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/analysis/CachingTokenFilter.java (revision 682416) +++ lucene/src/java/org/apache/lucene/analysis/CachingTokenFilter.java (working copy) @@ -40,11 +40,11 @@ super(input); } - public Token next() throws IOException { + public Token next(Token token) throws IOException { if (cache == null) { // fill cache lazily cache = new LinkedList(); - fillCache(); + fillCache(token); iterator = cache.iterator(); } @@ -52,8 +52,9 @@ // the cache is exhausted, return null return null; } - - return (Token) iterator.next(); + // Since the TokenFilter can be reset, the tokens need to be preserved as immutable. + Token t = (Token) iterator.next(); + return (Token) t.clone(); } public void reset() throws IOException { @@ -62,10 +63,9 @@ } } - private void fillCache() throws IOException { - Token token; - while ( (token = input.next()) != null) { - cache.add(token); + private void fillCache(Token token) throws IOException { + for (token = input.next(token); token != null; token = input.next(token)) { + cache.add(token.clone()); } } Index: lucene/src/java/org/apache/lucene/analysis/CharTokenizer.java =================================================================== --- lucene/src/java/org/apache/lucene/analysis/CharTokenizer.java (revision 682416) +++ lucene/src/java/org/apache/lucene/analysis/CharTokenizer.java (working copy) @@ -81,9 +81,9 @@ break; // return 'em } - token.termLength = length; - token.startOffset = start; - token.endOffset = start+length; + token.setTermLength(length); + token.setStartOffset(start); + token.setEndOffset(start+length); return token; } Index: lucene/src/java/org/apache/lucene/analysis/PorterStemFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/analysis/PorterStemFilter.java (revision 682416) +++ lucene/src/java/org/apache/lucene/analysis/PorterStemFilter.java (working copy) @@ -48,7 +48,7 @@ public final Token next(Token result) throws IOException { result = input.next(result); if (result != null) { - if (stemmer.stem(result.termBuffer(), 0, result.termLength)) + if (stemmer.stem(result.termBuffer(), 0, result.termLength())) result.setTermBuffer(stemmer.getResultBuffer(), 0, stemmer.getResultLength()); return result; } else Index: lucene/src/java/org/apache/lucene/analysis/KeywordTokenizer.java =================================================================== --- lucene/src/java/org/apache/lucene/analysis/KeywordTokenizer.java (revision 682416) +++ lucene/src/java/org/apache/lucene/analysis/KeywordTokenizer.java (working copy) @@ -51,7 +51,7 @@ if (upto == buffer.length) buffer = result.resizeTermBuffer(1+buffer.length); } - result.termLength = upto; + result.setTermLength(upto); return result; } return null; Index: lucene/src/java/org/apache/lucene/analysis/LowerCaseFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/analysis/LowerCaseFilter.java (revision 682416) +++ lucene/src/java/org/apache/lucene/analysis/LowerCaseFilter.java (working copy) @@ -34,7 +34,7 @@ if (result != null) { final char[] buffer = result.termBuffer(); - final int length = result.termLength; + final int length = result.termLength(); for(int i=0;i