Index: CHANGES.txt =================================================================== --- CHANGES.txt (revision 899380) +++ CHANGES.txt (working copy) @@ -96,6 +96,11 @@ implementation class when interface was loaded by a different class loader. (Uwe Schindler, reported on java-user by Ahmed El-dawy) +* LUCENE-2211: Fix missing clearAttributes() calls in contrib: + ShingleMatrix, PrefixAware, compounds, NGramTokenFilter, + EdgeNGramTokenFilter, Highlighter, and MemoryIndex. + (Uwe Schindler, Robert Muir) + New features * LUCENE-2128: Parallelized fetching document frequencies during weight @@ -197,6 +202,10 @@ * LUCENE-2170: Fix thread starvation problems. (Uwe Schindler) +* LUCENE-2211: Improves BaseTokenStreamTestCase to use a fake attribute + that checks if clearAttributes() was called correctly. + (Uwe Schindler, Robert Muir) + ======================= Release 3.0.0 2009-11-25 ======================= Changes in backwards compatibility policy Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java (revision 899380) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java (working copy) @@ -188,6 +188,7 @@ } private final void setToken(final Token token) throws IOException { + clearAttributes(); termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength()); flagsAtt.setFlags(token.getFlags()); typeAtt.setType(token.type()); Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java (revision 899380) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java (working copy) @@ -114,6 +114,7 @@ private void setCurrentToken(Token token) { if (token == null) return; + clearAttributes(); termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength()); posIncrAtt.setPositionIncrement(token.getPositionIncrement()); flagsAtt.setFlags(token.getFlags()); Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java (revision 899380) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java (working copy) @@ -134,6 +134,7 @@ // grab gramSize chars from front or back int start = side == Side.FRONT ? 0 : curTermLength - curGramSize; int end = start + curGramSize; + clearAttributes(); offsetAtt.setOffset(start, end); termAtt.setTermBuffer(curTermBuffer, start, curGramSize); curGramSize++; Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java (revision 899380) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java (working copy) @@ -87,6 +87,7 @@ } while (curGramSize <= maxGram) { while (curPos+curGramSize <= curTermLength) { // while there is input + clearAttributes(); termAtt.setTermBuffer(curTermBuffer, curPos, curGramSize); offsetAtt.setOffset(curPos, curPos+curGramSize); curPos++; Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java (revision 899380) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java (working copy) @@ -377,6 +377,7 @@ } while (token == request_next_token); if (token == null) return false; + clearAttributes(); termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength()); posIncrAtt.setPositionIncrement(token.getPositionIncrement()); flagsAtt.setFlags(token.getFlags()); Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java =================================================================== --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java (revision 899380) +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java (working copy) @@ -19,10 +19,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.Token; -import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.WhitespaceTokenizer; -import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import java.io.IOException; import java.io.StringReader; @@ -36,24 +33,12 @@ new WhitespaceTokenizer(new StringReader("hello world")), new SingleTokenTokenStream(createToken("$", 0, 0))); - assertNext(ts, "^", 0, 0); - assertNext(ts, "hello", 0, 5); - assertNext(ts, "world", 6, 11); - assertNext(ts, "$", 11, 11); - assertFalse(ts.incrementToken()); + assertTokenStreamContents(ts, + new String[] { "^", "hello", "world", "$" }, + new int[] { 0, 0, 6, 11 }, + new int[] { 0, 5, 11, 11 }); } - - private void assertNext(TokenStream ts, String text, int startOffset, int endOffset) throws IOException { - TermAttribute termAtt = ts.addAttribute(TermAttribute.class); - OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); - - assertTrue(ts.incrementToken()); - assertEquals(text, termAtt.term()); - assertEquals(startOffset, offsetAtt.startOffset()); - assertEquals(endOffset, offsetAtt.endOffset()); - } - private static Token createToken(String term, int start, int offset) { Token token = new Token(start, offset); Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java =================================================================== --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java (revision 899380) +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java (working copy) @@ -19,10 +19,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.Token; -import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.WhitespaceTokenizer; -import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import java.io.IOException; import java.io.StringReader; @@ -36,33 +33,22 @@ ts = new PrefixAwareTokenFilter( new SingleTokenTokenStream(createToken("a", 0, 1)), new SingleTokenTokenStream(createToken("b", 0, 1))); - assertNext(ts, "a", 0, 1); - assertNext(ts, "b", 1, 2); - assertFalse(ts.incrementToken()); + assertTokenStreamContents(ts, + new String[] { "a", "b" }, + new int[] { 0, 1 }, + new int[] { 1, 2 }); // prefix and suffix using 2x prefix ts = new PrefixAwareTokenFilter(new SingleTokenTokenStream(createToken("^", 0, 0)), new WhitespaceTokenizer(new StringReader("hello world"))); ts = new PrefixAwareTokenFilter(ts, new SingleTokenTokenStream(createToken("$", 0, 0))); - assertNext(ts, "^", 0, 0); - assertNext(ts, "hello", 0, 5); - assertNext(ts, "world", 6, 11); - assertNext(ts, "$", 11, 11); - assertFalse(ts.incrementToken()); + assertTokenStreamContents(ts, + new String[] { "^", "hello", "world", "$" }, + new int[] { 0, 0, 6, 11 }, + new int[] { 0, 5, 11, 11 }); } - - private void assertNext(TokenStream ts, String text, int startOffset, int endOffset) throws IOException { - TermAttribute termAtt = ts.addAttribute(TermAttribute.class); - OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); - - assertTrue(ts.incrementToken()); - assertEquals(text, termAtt.term()); - assertEquals(startOffset, offsetAtt.startOffset()); - assertEquals(endOffset, offsetAtt.endOffset()); - } - private static Token createToken(String term, int start, int offset) { Token token = new Token(start, offset); Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java =================================================================== --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java (revision 899380) +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java (working copy) @@ -85,22 +85,12 @@ ts = new ShingleMatrixFilter(tls, 1, 2, new Character(' '), false, new ShingleMatrixFilter.OneDimensionalNonWeightedTokenSettingsCodec()); - - assertNext(ts, "please", 0, 6); - assertNext(ts, "please divide", 0, 13); - assertNext(ts, "divide", 7, 13); - assertNext(ts, "divide this", 7, 18); - assertNext(ts, "this", 14, 18); - assertNext(ts, "this sentence", 14, 27); - assertNext(ts, "sentence", 19, 27); - assertNext(ts, "sentence into", 19, 32); - assertNext(ts, "into", 28, 32); - assertNext(ts, "into shingles", 28, 39); - assertNext(ts, "shingles", 33, 39); - - - assertFalse(ts.incrementToken()); - + assertTokenStreamContents(ts, + new String[] { "please", "please divide", "divide", "divide this", + "this", "this sentence", "sentence", "sentence into", "into", + "into shingles", "shingles" }, + new int[] { 0, 0, 7, 7, 14, 14, 19, 19, 28, 28, 33 }, + new int[] { 6, 13, 13, 18, 18, 27, 27, 32, 32, 39, 39 }); } /** @@ -546,6 +536,7 @@ return false; } Token prototype = (Token) iterator.next(); + clearAttributes(); termAtt.setTermBuffer(prototype.termBuffer(), 0, prototype.termLength()); posIncrAtt.setPositionIncrement(prototype.getPositionIncrement()); flagsAtt.setFlags(prototype.getFlags()); Index: contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java =================================================================== --- contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java (revision 899380) +++ contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java (working copy) @@ -205,7 +205,7 @@ public boolean incrementToken() throws IOException { if( !getNextPartialSnippet() ) return false; - + clearAttributes(); termAtt.setTermBuffer(snippet, startTerm, lenTerm); offsetAtt.setOffset(correctOffset(startOffset), correctOffset(startOffset + lenTerm)); return true; Index: contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java =================================================================== --- contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java (revision 899380) +++ contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java (working copy) @@ -169,6 +169,7 @@ return false; } Token token = tokens[currentToken++]; + clearAttributes(); termAtt.setTermBuffer(token.term()); offsetAtt.setOffset(token.startOffset(), token.endOffset()); return true; Index: contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java =================================================================== --- contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java (revision 899380) +++ contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java (working copy) @@ -102,6 +102,7 @@ public boolean incrementToken() throws IOException { if (this.tokensAtCurrentPosition.hasNext()) { final Token next = this.tokensAtCurrentPosition.next(); + clearAttributes(); termAttribute.setTermBuffer(next.term()); positionIncrementAttribute.setPositionIncrement(next .getPositionIncrement()); Index: contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java =================================================================== --- contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java (revision 899380) +++ contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java (working copy) @@ -311,6 +311,7 @@ if (this.i >= this.tokens.length) { return false; } + clearAttributes(); termAttribute.setTermBuffer(this.tokens[i].term(), 0, this.tokens[i] .term().length()); offsetAttribute.setOffset(this.tokens[i].startOffset(), this.tokens[i] @@ -355,6 +356,7 @@ if (this.i >= this.tokens.length) { return false; } + clearAttributes(); termAttribute.setTermBuffer(this.tokens[i].term(), 0, this.tokens[i] .term().length()); offsetAttribute.setOffset(this.tokens[i].startOffset(), this.tokens[i] Index: contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java =================================================================== --- contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (revision 899380) +++ contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (working copy) @@ -1458,6 +1458,7 @@ public boolean incrementToken() throws IOException { if(iter.hasNext()) { Token token = iter.next(); + clearAttributes(); termAtt.setTermBuffer(token.term()); posIncrAtt.setPositionIncrement(token.getPositionIncrement()); offsetAtt.setOffset(token.startOffset(), token.endOffset()); @@ -1506,6 +1507,7 @@ public boolean incrementToken() throws IOException { if(iter.hasNext()) { Token token = iter.next(); + clearAttributes(); termAtt.setTermBuffer(token.term()); posIncrAtt.setPositionIncrement(token.getPositionIncrement()); offsetAtt.setOffset(token.startOffset(), token.endOffset()); @@ -1845,6 +1847,7 @@ return false; } //Token nextRealToken = new Token(, offsetAtt.startOffset(), offsetAtt.endOffset()); + clearAttributes(); termAtt.setTermBuffer(realTermAtt.term()); offsetAtt.setOffset(realOffsetAtt.startOffset(), realOffsetAtt.endOffset()); posIncrAtt.setPositionIncrement(realPosIncrAtt.getPositionIncrement()); @@ -1862,6 +1865,7 @@ return true; } else { String tok = st.nextToken(); + clearAttributes(); termAtt.setTermBuffer(tok); offsetAtt.setOffset(currentRealToken.startOffset(), currentRealToken.endOffset()); posIncrAtt.setPositionIncrement(0); Index: contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java =================================================================== --- contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (revision 899380) +++ contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (working copy) @@ -287,6 +287,7 @@ throw new IllegalArgumentException("keyword must not be null"); String term = obj.toString(); + clearAttributes(); termAtt.setTermBuffer(term); offsetAtt.setOffset(start, start+termAtt.termLength()); start += term.length() + 1; // separate words by 1 (blank) character Index: contrib/misc/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java =================================================================== --- contrib/misc/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java (revision 899380) +++ contrib/misc/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java (working copy) @@ -73,6 +73,7 @@ @Override public boolean incrementToken() throws IOException { + clearAttributes(); if (inPhrase) { inPhrase = false; termAtt.setTermBuffer("phrase2"); Index: contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java =================================================================== --- contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java (revision 899380) +++ contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java (working copy) @@ -119,6 +119,7 @@ public boolean incrementToken() throws IOException { if (inPhrase) { inPhrase = false; + clearAttributes(); termAtt.setTermBuffer("phrase2"); offsetAtt.setOffset(savedStart, savedEnd); return true; @@ -1163,6 +1164,7 @@ final TermAttribute term = addAttribute(TermAttribute.class); @Override public boolean incrementToken() { + clearAttributes(); if (upto == 4) { return false; } Index: contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java =================================================================== --- contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java (revision 899380) +++ contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java (working copy) @@ -114,6 +114,7 @@ public boolean incrementToken() throws IOException { if (inPhrase) { inPhrase = false; + clearAttributes(); termAtt.setTermBuffer("phrase2"); offsetAtt.setOffset(savedStart, savedEnd); return true; Index: contrib/snowball/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java =================================================================== --- contrib/snowball/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java (revision 899380) +++ contrib/snowball/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java (working copy) @@ -131,6 +131,7 @@ @Override public boolean incrementToken() { + clearAttributes(); termAtt.setTermBuffer("accents"); offsetAtt.setOffset(2, 7); typeAtt.setType("wrd"); Index: contrib/wikipedia/src/test/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerTest.java =================================================================== --- contrib/wikipedia/src/test/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerTest.java (revision 899380) +++ contrib/wikipedia/src/test/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerTest.java (working copy) @@ -18,8 +18,6 @@ package org.apache.lucene.wikipedia.analysis; -import junit.framework.TestCase; - import java.io.StringReader; import java.io.IOException; import java.util.HashMap; @@ -46,6 +44,12 @@ super(s); } + public void testSimple() throws Exception { + WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader("This is a [[Category:foo]]")); + assertTokenStreamContents(tf, + new String[] { "This", "is", "a", "foo" }); + } + public void testHandwritten() throws Exception { //make sure all tokens are in only one type String test = "[[link]] This is a [[Category:foo]] Category This is a linked [[:Category:bar none withstanding]] " + Index: src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java =================================================================== --- src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (revision 899380) +++ src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (working copy) @@ -21,6 +21,8 @@ import java.io.IOException; import org.apache.lucene.analysis.tokenattributes.*; +import org.apache.lucene.util.Attribute; +import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.LuceneTestCase; /** @@ -38,26 +40,67 @@ // some helpers to test Analyzers and TokenStreams: + public static interface CheckClearAttributesAttribute extends Attribute { + boolean getAndResetClearCalled(); + } + + public static final class CheckClearAttributesAttributeImpl extends AttributeImpl implements CheckClearAttributesAttribute { + private boolean clearCalled = false; + + public boolean getAndResetClearCalled() { + try { + return clearCalled; + } finally { + clearCalled = false; + } + } + + @Override + public void clear() { + clearCalled = true; + } + + @Override + public boolean equals(Object other) { + return ( + other instanceof CheckClearAttributesAttributeImpl && + ((CheckClearAttributesAttributeImpl) other).clearCalled == this.clearCalled + ); + } + + @Override + public int hashCode() { + return 76137213 ^ Boolean.valueOf(clearCalled).hashCode(); + } + + @Override + public void copyTo(AttributeImpl target) { + ((CheckClearAttributesAttributeImpl) target).clear(); + } + } + public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException { assertNotNull(output); - assertTrue("has TermAttribute", ts.hasAttribute(TermAttribute.class)); + CheckClearAttributesAttribute checkClearAtt = ts.addAttribute(CheckClearAttributesAttribute.class); + + assertTrue("has no TermAttribute", ts.hasAttribute(TermAttribute.class)); TermAttribute termAtt = ts.getAttribute(TermAttribute.class); OffsetAttribute offsetAtt = null; if (startOffsets != null || endOffsets != null) { - assertTrue("has OffsetAttribute", ts.hasAttribute(OffsetAttribute.class)); + assertTrue("has no OffsetAttribute", ts.hasAttribute(OffsetAttribute.class)); offsetAtt = ts.getAttribute(OffsetAttribute.class); } TypeAttribute typeAtt = null; if (types != null) { - assertTrue("has TypeAttribute", ts.hasAttribute(TypeAttribute.class)); + assertTrue("has no TypeAttribute", ts.hasAttribute(TypeAttribute.class)); typeAtt = ts.getAttribute(TypeAttribute.class); } PositionIncrementAttribute posIncrAtt = null; if (posIncrements != null) { - assertTrue("has PositionIncrementAttribute", ts.hasAttribute(PositionIncrementAttribute.class)); + assertTrue("has no PositionIncrementAttribute", ts.hasAttribute(PositionIncrementAttribute.class)); posIncrAtt = ts.getAttribute(PositionIncrementAttribute.class); } @@ -70,7 +113,10 @@ if (typeAtt != null) typeAtt.setType("bogusType"); if (posIncrAtt != null) posIncrAtt.setPositionIncrement(45987657); - assertTrue("token "+i+" exists", ts.incrementToken()); + checkClearAtt.getAndResetClearCalled(); // reset it, because we called clearAttribute() before + assertTrue("token "+i+" does not exist", ts.incrementToken()); + assertTrue("clearAttributes() was not called correctly in TokenStream chain", checkClearAtt.getAndResetClearCalled()); + assertEquals("term "+i, output[i], termAtt.term()); if (startOffsets != null) assertEquals("startOffset "+i, startOffsets[i], offsetAtt.startOffset()); Index: src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java =================================================================== --- src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java (revision 899380) +++ src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java (working copy) @@ -49,6 +49,7 @@ if (index == tokens.length) { return false; } else { + clearAttributes(); termAtt.setTermBuffer(tokens[index++]); offsetAtt.setOffset(0,0); return true; Index: src/test/org/apache/lucene/analysis/TestTeeSinkTokenFilter.java =================================================================== --- src/test/org/apache/lucene/analysis/TestTeeSinkTokenFilter.java (revision 899380) +++ src/test/org/apache/lucene/analysis/TestTeeSinkTokenFilter.java (working copy) @@ -79,29 +79,14 @@ final TeeSinkTokenFilter source = new TeeSinkTokenFilter(new WhitespaceTokenizer(new StringReader(buffer1.toString()))); final TokenStream sink1 = source.newSinkTokenStream(); final TokenStream sink2 = source.newSinkTokenStream(theFilter); - int i = 0; - TermAttribute termAtt = source.getAttribute(TermAttribute.class); - while (source.incrementToken()) { - assertEquals(tokens1[i], termAtt.term()); - i++; - } - assertEquals(tokens1.length, i); - i = 0; - termAtt = sink1.getAttribute(TermAttribute.class); - while (sink1.incrementToken()) { - assertEquals(tokens1[i], termAtt.term()); - i++; - } - assertEquals(tokens1.length, i); + source.addAttribute(CheckClearAttributesAttribute.class); + sink1.addAttribute(CheckClearAttributesAttribute.class); + sink2.addAttribute(CheckClearAttributesAttribute.class); - i = 0; - termAtt = sink2.getAttribute(TermAttribute.class); - while (sink2.incrementToken()) { - assertTrue(termAtt.term().equalsIgnoreCase("The")); - i++; - } - assertEquals("there should be two times 'the' in the stream", 2, i); + assertTokenStreamContents(source, tokens1); + assertTokenStreamContents(sink1, tokens1); + assertTokenStreamContents(sink2, new String[]{"The", "the"}); } public void testMultipleSources() throws Exception { @@ -109,50 +94,28 @@ final TeeSinkTokenFilter.SinkTokenStream dogDetector = tee1.newSinkTokenStream(dogFilter); final TeeSinkTokenFilter.SinkTokenStream theDetector = tee1.newSinkTokenStream(theFilter); final TokenStream source1 = new CachingTokenFilter(tee1); + + tee1.addAttribute(CheckClearAttributesAttribute.class); + dogDetector.addAttribute(CheckClearAttributesAttribute.class); + theDetector.addAttribute(CheckClearAttributesAttribute.class); final TeeSinkTokenFilter tee2 = new TeeSinkTokenFilter(new WhitespaceTokenizer(new StringReader(buffer2.toString()))); tee2.addSinkTokenStream(dogDetector); tee2.addSinkTokenStream(theDetector); final TokenStream source2 = tee2; - int i = 0; - TermAttribute termAtt = source1.getAttribute(TermAttribute.class); - while (source1.incrementToken()) { - assertEquals(tokens1[i], termAtt.term()); - i++; - } - assertEquals(tokens1.length, i); - i = 0; - termAtt = source2.getAttribute(TermAttribute.class); - while (source2.incrementToken()) { - assertEquals(tokens2[i], termAtt.term()); - i++; - } - assertEquals(tokens2.length, i); - i = 0; - termAtt = theDetector.getAttribute(TermAttribute.class); - while (theDetector.incrementToken()) { - assertTrue("'" + termAtt.term() + "' is not equal to 'The'", termAtt.term().equalsIgnoreCase("The")); - i++; - } - assertEquals("there must be 4 times 'The' in the stream", 4, i); - i = 0; - termAtt = dogDetector.getAttribute(TermAttribute.class); - while (dogDetector.incrementToken()) { - assertTrue("'" + termAtt.term() + "' is not equal to 'Dogs'", termAtt.term().equalsIgnoreCase("Dogs")); - i++; - } - assertEquals("there must be 2 times 'Dog' in the stream", 2, i); + assertTokenStreamContents(source1, tokens1); + assertTokenStreamContents(source2, tokens2); + + assertTokenStreamContents(theDetector, new String[]{"The", "the", "The", "the"}); + assertTokenStreamContents(dogDetector, new String[]{"Dogs", "Dogs"}); source1.reset(); TokenStream lowerCasing = new LowerCaseFilter(Version.LUCENE_CURRENT, source1); - i = 0; - termAtt = lowerCasing.getAttribute(TermAttribute.class); - while (lowerCasing.incrementToken()) { - assertEquals(tokens1[i].toLowerCase(), termAtt.term()); - i++; - } - assertEquals(i, tokens1.length); + String[] lowerCaseTokens = new String[tokens1.length]; + for (int i = 0; i < tokens1.length; i++) + lowerCaseTokens[i] = tokens1[i].toLowerCase(); + assertTokenStreamContents(lowerCasing, lowerCaseTokens); } /** Index: src/test/org/apache/lucene/index/TestDocumentWriter.java =================================================================== --- src/test/org/apache/lucene/index/TestDocumentWriter.java (revision 899380) +++ src/test/org/apache/lucene/index/TestDocumentWriter.java (working copy) @@ -221,6 +221,7 @@ if (index == tokens.length) { return false; } else { + clearAttributes(); termAtt.setTermBuffer(tokens[index++]); return true; } Index: src/test/org/apache/lucene/index/TestPayloads.java =================================================================== --- src/test/org/apache/lucene/index/TestPayloads.java (revision 899380) +++ src/test/org/apache/lucene/index/TestPayloads.java (working copy) @@ -537,6 +537,7 @@ public boolean incrementToken() throws IOException { if (!first) return false; first = false; + clearAttributes(); termAtt.setTermBuffer(term); payloadAtt.setPayload(new Payload(payload)); return true; Index: src/test/org/apache/lucene/index/TestTermdocPerf.java =================================================================== --- src/test/org/apache/lucene/index/TestTermdocPerf.java (revision 899380) +++ src/test/org/apache/lucene/index/TestTermdocPerf.java (working copy) @@ -48,6 +48,7 @@ public boolean incrementToken() throws IOException { num--; if (num >= 0) { + clearAttributes(); termAtt.setTermBuffer(value); return true; } Index: src/test/org/apache/lucene/index/TestTermVectorsReader.java =================================================================== --- src/test/org/apache/lucene/index/TestTermVectorsReader.java (revision 899380) +++ src/test/org/apache/lucene/index/TestTermVectorsReader.java (working copy) @@ -138,6 +138,7 @@ return false; else { final TestToken testToken = tokens[tokenUpto++]; + clearAttributes(); termAtt.setTermBuffer(testToken.text); offsetAtt.setOffset(testToken.startOffset, testToken.endOffset); if (tokenUpto > 1) { Index: src/test/org/apache/lucene/queryParser/TestQueryParser.java =================================================================== --- src/test/org/apache/lucene/queryParser/TestQueryParser.java (revision 899380) +++ src/test/org/apache/lucene/queryParser/TestQueryParser.java (working copy) @@ -102,6 +102,7 @@ public boolean incrementToken() throws IOException { if (inPhrase) { inPhrase = false; + clearAttributes(); termAtt.setTermBuffer("phrase2"); offsetAtt.setOffset(savedStart, savedEnd); return true; Index: src/test/org/apache/lucene/search/TestPositionIncrement.java =================================================================== --- src/test/org/apache/lucene/search/TestPositionIncrement.java (revision 899380) +++ src/test/org/apache/lucene/search/TestPositionIncrement.java (working copy) @@ -77,6 +77,7 @@ public boolean incrementToken() { if (i == TOKENS.length) return false; + clearAttributes(); termAtt.setTermBuffer(TOKENS[i]); offsetAtt.setOffset(i,i); posIncrAtt.setPositionIncrement(INCREMENTS[i]); Index: src/test/org/apache/lucene/search/TestTermRangeQuery.java =================================================================== --- src/test/org/apache/lucene/search/TestTermRangeQuery.java (revision 899380) +++ src/test/org/apache/lucene/search/TestTermRangeQuery.java (working copy) @@ -273,6 +273,7 @@ if (done) return false; else { + clearAttributes(); done = true; if (count == 1) { termAtt.termBuffer()[0] = buffer[0];