Index: src/java/org/apache/lucene/analysis/TokenStream.java =================================================================== --- src/java/org/apache/lucene/analysis/TokenStream.java (revision 816505) +++ src/java/org/apache/lucene/analysis/TokenStream.java (working copy) @@ -29,6 +29,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Payload; import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.AttributeSource; @@ -377,10 +378,7 @@ return incrementToken() ? tokenWrapper.delegate : null; } else { assert supportedMethods.hasNext; - final Token token = next(); - if (token == null) return null; - tokenWrapper.delegate = token; - return token; + return next(); } } @@ -396,15 +394,24 @@ if (tokenWrapper == null) throw new UnsupportedOperationException("This TokenStream only supports the new Attributes API."); + final Token nextToken; if (supportedMethods.hasIncrementToken) { - return incrementToken() ? ((Token) tokenWrapper.delegate.clone()) : null; + final Token savedDelegate = tokenWrapper.delegate; + tokenWrapper.delegate = new Token(); + nextToken = incrementToken() ? tokenWrapper.delegate : null; + tokenWrapper.delegate = savedDelegate; } else { assert supportedMethods.hasReusableNext; - final Token token = next(tokenWrapper.delegate); - if (token == null) return null; - tokenWrapper.delegate = token; - return (Token) token.clone(); + nextToken = next(new Token()); } + + if (nextToken != null) { + Payload p = nextToken.getPayload(); + if (p != null) { + nextToken.setPayload((Payload) p.clone()); + } + } + return nextToken; } /** Index: src/test/org/apache/lucene/analysis/TestTokenStreamBWComp.java =================================================================== --- src/test/org/apache/lucene/analysis/TestTokenStreamBWComp.java (revision 816505) +++ src/test/org/apache/lucene/analysis/TestTokenStreamBWComp.java (working copy) @@ -27,8 +27,9 @@ /** This class tests some special cases of backwards compatibility when using the new TokenStream API with old analyzers */ public class TestTokenStreamBWComp extends LuceneTestCase { - private final String doc = "This is the new TokenStream api"; - private final String[] stopwords = new String[] {"is", "the", "this"}; + private static final String doc = "This is the new TokenStream api"; + private static final String[] stopwords = new String[] {"is", "the", "this"}; + private static final String[] results = new String[] {"new", "tokenstream", "api"}; public static class POSToken extends Token { public static final int PROPERNOUN = 1; @@ -190,14 +191,17 @@ PayloadAttribute payloadAtt = (PayloadAttribute) stream.addAttribute(PayloadAttribute.class); TermAttribute termAtt = (TermAttribute) stream.addAttribute(TermAttribute.class); + int i=0; while (stream.incrementToken()) { String term = termAtt.term(); Payload p = payloadAtt.getPayload(); if (p != null && p.getData().length == 1 && p.getData()[0] == PartOfSpeechAnnotatingFilter.PROPER_NOUN_ANNOTATION) { - assertTrue("only TokenStream is a proper noun", "tokenstream".equals(term)); + assertEquals("only TokenStream is a proper noun", "tokenstream", term); } else { assertFalse("all other tokens (if this test fails, the special POSToken subclass is not correctly passed through the chain)", "tokenstream".equals(term)); } + assertEquals(results[i], term); + i++; } } @@ -205,14 +209,17 @@ stream.reset(); Token reusableToken = new Token(); + int i=0; while ((reusableToken = stream.next(reusableToken)) != null) { String term = reusableToken.term(); Payload p = reusableToken.getPayload(); if (p != null && p.getData().length == 1 && p.getData()[0] == PartOfSpeechAnnotatingFilter.PROPER_NOUN_ANNOTATION) { - assertTrue("only TokenStream is a proper noun", "tokenstream".equals(term)); + assertEquals("only TokenStream is a proper noun", "tokenstream", term); } else { assertFalse("all other tokens (if this test fails, the special POSToken subclass is not correctly passed through the chain)", "tokenstream".equals(term)); } + assertEquals(results[i], term); + i++; } } @@ -220,14 +227,17 @@ stream.reset(); Token token; + int i=0; while ((token = stream.next()) != null) { String term = token.term(); Payload p = token.getPayload(); if (p != null && p.getData().length == 1 && p.getData()[0] == PartOfSpeechAnnotatingFilter.PROPER_NOUN_ANNOTATION) { - assertTrue("only TokenStream is a proper noun", "tokenstream".equals(term)); + assertEquals("only TokenStream is a proper noun", "tokenstream", term); } else { assertFalse("all other tokens (if this test fails, the special POSToken subclass is not correctly passed through the chain)", "tokenstream".equals(term)); } + assertEquals(results[i], term); + i++; } } @@ -245,7 +255,7 @@ while (stream.incrementToken()); fail("If only the new API is allowed, this should fail with an UOE"); } catch (UnsupportedOperationException uoe) { - assertTrue((PartOfSpeechTaggingFilter.class.getName()+" does not implement incrementToken() which is needed for onlyUseNewAPI.").equals(uoe.getMessage())); + assertEquals((PartOfSpeechTaggingFilter.class.getName()+" does not implement incrementToken() which is needed for onlyUseNewAPI."),uoe.getMessage()); } // this should pass, as all core token streams support the new API @@ -255,17 +265,17 @@ while (stream.incrementToken()); // Test, if all attributes are implemented by their implementation, not Token/TokenWrapper - assertTrue("TermAttribute is implemented by TermAttributeImpl", + assertTrue("TermAttribute is not implemented by TermAttributeImpl", stream.addAttribute(TermAttribute.class) instanceof TermAttributeImpl); - assertTrue("OffsetAttribute is implemented by OffsetAttributeImpl", + assertTrue("OffsetAttribute is not implemented by OffsetAttributeImpl", stream.addAttribute(OffsetAttribute.class) instanceof OffsetAttributeImpl); - assertTrue("FlagsAttribute is implemented by FlagsAttributeImpl", + assertTrue("FlagsAttribute is not implemented by FlagsAttributeImpl", stream.addAttribute(FlagsAttribute.class) instanceof FlagsAttributeImpl); - assertTrue("PayloadAttribute is implemented by PayloadAttributeImpl", + assertTrue("PayloadAttribute is not implemented by PayloadAttributeImpl", stream.addAttribute(PayloadAttribute.class) instanceof PayloadAttributeImpl); - assertTrue("PositionIncrementAttribute is implemented by PositionIncrementAttributeImpl", + assertTrue("PositionIncrementAttribute is not implemented by PositionIncrementAttributeImpl", stream.addAttribute(PositionIncrementAttribute.class) instanceof PositionIncrementAttributeImpl); - assertTrue("TypeAttribute is implemented by TypeAttributeImpl", + assertTrue("TypeAttribute is not implemented by TypeAttributeImpl", stream.addAttribute(TypeAttribute.class) instanceof TypeAttributeImpl); // try to call old API, this should fail @@ -275,14 +285,14 @@ while ((reusableToken = stream.next(reusableToken)) != null); fail("If only the new API is allowed, this should fail with an UOE"); } catch (UnsupportedOperationException uoe) { - assertTrue("This TokenStream only supports the new Attributes API.".equals(uoe.getMessage())); + assertEquals("This TokenStream only supports the new Attributes API.", uoe.getMessage()); } try { stream.reset(); while (stream.next() != null); fail("If only the new API is allowed, this should fail with an UOE"); } catch (UnsupportedOperationException uoe) { - assertTrue("This TokenStream only supports the new Attributes API.".equals(uoe.getMessage())); + assertEquals("This TokenStream only supports the new Attributes API.", uoe.getMessage()); } // Test if the wrapper API (onlyUseNewAPI==false) uses TokenWrapper @@ -292,17 +302,17 @@ // itsself. TokenStream.setOnlyUseNewAPI(false); stream = new WhitespaceTokenizer(new StringReader(doc)); - assertTrue("TermAttribute is implemented by TokenWrapper", + assertTrue("TermAttribute is not implemented by TokenWrapper", stream.addAttribute(TermAttribute.class) instanceof TokenWrapper); - assertTrue("OffsetAttribute is implemented by TokenWrapper", + assertTrue("OffsetAttribute is not implemented by TokenWrapper", stream.addAttribute(OffsetAttribute.class) instanceof TokenWrapper); - assertTrue("FlagsAttribute is implemented by TokenWrapper", + assertTrue("FlagsAttribute is not implemented by TokenWrapper", stream.addAttribute(FlagsAttribute.class) instanceof TokenWrapper); - assertTrue("PayloadAttribute is implemented by TokenWrapper", + assertTrue("PayloadAttribute is not implemented by TokenWrapper", stream.addAttribute(PayloadAttribute.class) instanceof TokenWrapper); - assertTrue("PositionIncrementAttribute is implemented by TokenWrapper", + assertTrue("PositionIncrementAttribute is not implemented by TokenWrapper", stream.addAttribute(PositionIncrementAttribute.class) instanceof TokenWrapper); - assertTrue("TypeAttribute is implemented by TokenWrapper", + assertTrue("TypeAttribute is not implemented by TokenWrapper", stream.addAttribute(TypeAttribute.class) instanceof TokenWrapper); } finally { @@ -321,8 +331,29 @@ while (stream.incrementToken()); fail("One TokenFilter does not override any of the required methods, so it should fail."); } catch (UnsupportedOperationException uoe) { - assertTrue(uoe.getMessage().endsWith("does not implement any of incrementToken(), next(Token), next().")); + assertTrue("invalid UOE message", uoe.getMessage().endsWith("does not implement any of incrementToken(), next(Token), next().")); } } + public void testMixedOldApiConsumer() throws Exception { + // WgitespaceTokenizer is using incrementToken() API: + TokenStream stream = new WhitespaceTokenizer(new StringReader("foo bar moo maeh")); + + Token foo = new Token(); + foo = stream.next(foo); + Token bar = stream.next(); + assertEquals("foo", foo.term()); + assertEquals("bar", bar.term()); + + Token moo = stream.next(foo); + assertEquals("moo", moo.term()); + assertEquals("private 'bar' term should still be valid", "bar", bar.term()); + + // and now we also use incrementToken()... (very bad, but should work) + TermAttribute termAtt = (TermAttribute) stream.getAttribute(TermAttribute.class); + assertTrue(stream.incrementToken()); + assertEquals("maeh", termAtt.term()); + assertEquals("private 'bar' term should still be valid", "bar", bar.term()); + } + }