Index: src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java =================================================================== --- src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (revision 899380) +++ src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (working copy) @@ -21,6 +21,8 @@ import java.io.IOException; import org.apache.lucene.analysis.tokenattributes.*; +import org.apache.lucene.util.Attribute; +import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.LuceneTestCase; /** @@ -38,8 +40,49 @@ // some helpers to test Analyzers and TokenStreams: + public static interface CheckClearAttributesAttribute extends Attribute { + boolean getAndResetClearCalled(); + } + + public static final class CheckClearAttributesAttributeImpl extends AttributeImpl implements CheckClearAttributesAttribute { + private boolean clearCalled = false; + + public boolean getAndResetClearCalled() { + try { + return clearCalled; + } finally { + clearCalled = false; + } + } + + @Override + public void clear() { + clearCalled = true; + } + + @Override + public boolean equals(Object other) { + return ( + other instanceof CheckClearAttributesAttributeImpl && + ((CheckClearAttributesAttributeImpl) other).clearCalled == this.clearCalled + ); + } + + @Override + public int hashCode() { + return 76137213 ^ Boolean.valueOf(clearCalled).hashCode(); + } + + @Override + public void copyTo(AttributeImpl target) { + ((CheckClearAttributesAttributeImpl) target).clear(); + } + } + public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException { assertNotNull(output); + CheckClearAttributesAttribute checkClearAtt = ts.addAttribute(CheckClearAttributesAttribute.class); + assertTrue("has TermAttribute", ts.hasAttribute(TermAttribute.class)); TermAttribute termAtt = ts.getAttribute(TermAttribute.class); @@ -62,15 +105,18 @@ } ts.reset(); + checkClearAtt.getAndResetClearCalled(); // reset it for (int i = 0; i < output.length; i++) { // extra safety to enforce, that the state is not preserved and also assign bogus values - ts.clearAttributes(); termAtt.setTermBuffer("bogusTerm"); if (offsetAtt != null) offsetAtt.setOffset(14584724,24683243); if (typeAtt != null) typeAtt.setType("bogusType"); if (posIncrAtt != null) posIncrAtt.setPositionIncrement(45987657); assertTrue("token "+i+" exists", ts.incrementToken()); + + assertTrue("clearAttributes() was not correctly called in TokenStreamChain", checkClearAtt.getAndResetClearCalled()); + assertEquals("term "+i, output[i], termAtt.term()); if (startOffsets != null) assertEquals("startOffset "+i, startOffsets[i], offsetAtt.startOffset()); Index: src/test/org/apache/lucene/analysis/TestTeeSinkTokenFilter.java =================================================================== --- src/test/org/apache/lucene/analysis/TestTeeSinkTokenFilter.java (revision 899380) +++ src/test/org/apache/lucene/analysis/TestTeeSinkTokenFilter.java (working copy) @@ -79,25 +79,19 @@ final TeeSinkTokenFilter source = new TeeSinkTokenFilter(new WhitespaceTokenizer(new StringReader(buffer1.toString()))); final TokenStream sink1 = source.newSinkTokenStream(); final TokenStream sink2 = source.newSinkTokenStream(theFilter); - int i = 0; - TermAttribute termAtt = source.getAttribute(TermAttribute.class); - while (source.incrementToken()) { - assertEquals(tokens1[i], termAtt.term()); - i++; - } - assertEquals(tokens1.length, i); - i = 0; - termAtt = sink1.getAttribute(TermAttribute.class); - while (sink1.incrementToken()) { - assertEquals(tokens1[i], termAtt.term()); - i++; - } - assertEquals(tokens1.length, i); + source.addAttribute(CheckClearAttributesAttribute.class); + sink1.addAttribute(CheckClearAttributesAttribute.class); + CheckClearAttributesAttribute checkClearAtt = sink2.addAttribute(CheckClearAttributesAttribute.class); - i = 0; - termAtt = sink2.getAttribute(TermAttribute.class); + assertTokenStreamContents(source, tokens1); + assertTokenStreamContents(sink1, tokens1); + + int i = 0; + TermAttribute termAtt = sink2.getAttribute(TermAttribute.class); + checkClearAtt.getAndResetClearCalled(); // reset it while (sink2.incrementToken()) { + assertTrue("clearAttributes() was not correctly called in TokenStreamChain", checkClearAtt.getAndResetClearCalled()); assertTrue(termAtt.term().equalsIgnoreCase("The")); i++; } @@ -109,36 +103,35 @@ final TeeSinkTokenFilter.SinkTokenStream dogDetector = tee1.newSinkTokenStream(dogFilter); final TeeSinkTokenFilter.SinkTokenStream theDetector = tee1.newSinkTokenStream(theFilter); final TokenStream source1 = new CachingTokenFilter(tee1); + + tee1.addAttribute(CheckClearAttributesAttribute.class); + dogDetector.addAttribute(CheckClearAttributesAttribute.class); + theDetector.addAttribute(CheckClearAttributesAttribute.class); final TeeSinkTokenFilter tee2 = new TeeSinkTokenFilter(new WhitespaceTokenizer(new StringReader(buffer2.toString()))); tee2.addSinkTokenStream(dogDetector); tee2.addSinkTokenStream(theDetector); final TokenStream source2 = tee2; + assertTokenStreamContents(source1, tokens1); + assertTokenStreamContents(source2, tokens2); + int i = 0; - TermAttribute termAtt = source1.getAttribute(TermAttribute.class); - while (source1.incrementToken()) { - assertEquals(tokens1[i], termAtt.term()); - i++; - } - assertEquals(tokens1.length, i); - i = 0; - termAtt = source2.getAttribute(TermAttribute.class); - while (source2.incrementToken()) { - assertEquals(tokens2[i], termAtt.term()); - i++; - } - assertEquals(tokens2.length, i); - i = 0; - termAtt = theDetector.getAttribute(TermAttribute.class); + CheckClearAttributesAttribute checkClearAtt = theDetector.getAttribute(CheckClearAttributesAttribute.class); + TermAttribute termAtt = theDetector.getAttribute(TermAttribute.class); + checkClearAtt.getAndResetClearCalled(); // reset it while (theDetector.incrementToken()) { + assertTrue("clearAttributes() was not correctly called in TokenStreamChain", checkClearAtt.getAndResetClearCalled()); assertTrue("'" + termAtt.term() + "' is not equal to 'The'", termAtt.term().equalsIgnoreCase("The")); i++; } assertEquals("there must be 4 times 'The' in the stream", 4, i); i = 0; + checkClearAtt = dogDetector.getAttribute(CheckClearAttributesAttribute.class); termAtt = dogDetector.getAttribute(TermAttribute.class); + checkClearAtt.getAndResetClearCalled(); // reset it while (dogDetector.incrementToken()) { + assertTrue("clearAttributes() was not correctly called in TokenStreamChain", checkClearAtt.getAndResetClearCalled()); assertTrue("'" + termAtt.term() + "' is not equal to 'Dogs'", termAtt.term().equalsIgnoreCase("Dogs")); i++; } @@ -147,8 +140,11 @@ source1.reset(); TokenStream lowerCasing = new LowerCaseFilter(Version.LUCENE_CURRENT, source1); i = 0; + checkClearAtt = lowerCasing.getAttribute(CheckClearAttributesAttribute.class); termAtt = lowerCasing.getAttribute(TermAttribute.class); + checkClearAtt.getAndResetClearCalled(); // reset it while (lowerCasing.incrementToken()) { + assertTrue("clearAttributes() was not correctly called in TokenStreamChain", checkClearAtt.getAndResetClearCalled()); assertEquals(tokens1[i].toLowerCase(), termAtt.term()); i++; }