Index: lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/OffsetLimitTokenFilterTest.java =================================================================== --- lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/OffsetLimitTokenFilterTest.java (revision 1102290) +++ lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/OffsetLimitTokenFilterTest.java (revision ) @@ -20,10 +20,7 @@ import java.io.Reader; import java.io.StringReader; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.BaseTokenStreamTestCase; -import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.*; public class OffsetLimitTokenFilterTest extends BaseTokenStreamTestCase { @@ -52,15 +49,14 @@ assertTokenStreamContents(filter, new String[] {"short", "toolong", "evenmuchlongertext"}); - // TODO: This is not actually testing reuse! (reusableTokenStream is not implemented) - checkOneTermReuse(new Analyzer() { + checkOneTermReuse(new ReusableAnalyzerBase() { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { + public TokenStreamComponents createComponents(String fieldName, Reader reader) { MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); tokenizer.setEnableChecks(false); - return new OffsetLimitTokenFilter(tokenizer, 10); + return new TokenStreamComponents(tokenizer, new OffsetLimitTokenFilter(tokenizer, 10)); } }, "llenges", "llenges"); } -} \ No newline at end of file +} Index: modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java =================================================================== --- modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java (revision 1096339) +++ modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java (revision ) @@ -20,9 +20,7 @@ import java.io.IOException; import java.io.Reader; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.BaseTokenStreamTestCase; -import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.core.WhitespaceTokenizer; import com.ibm.icu.text.Normalizer2; @@ -31,11 +29,11 @@ * Tests the ICUNormalizer2Filter */ public class TestICUNormalizer2Filter extends BaseTokenStreamTestCase { - Analyzer a = new Analyzer() { + Analyzer a = new ReusableAnalyzerBase() { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new ICUNormalizer2Filter( - new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader)); + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader); + return new TokenStreamComponents(tokenizer, new ICUNormalizer2Filter(tokenizer)); } }; @@ -61,13 +59,14 @@ } public void testAlternate() throws IOException { - Analyzer a = new Analyzer() { + Analyzer a = new ReusableAnalyzerBase() { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new ICUNormalizer2Filter( - new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader); + return new TokenStreamComponents(tokenizer, new ICUNormalizer2Filter( + tokenizer, /* specify nfc with decompose to get nfd */ - Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.DECOMPOSE)); + Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.DECOMPOSE))); } }; Index: lucene/src/test/org/apache/lucene/index/TestSameTokenSamePosition.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestSameTokenSamePosition.java (revision 1162347) +++ lucene/src/test/org/apache/lucene/index/TestSameTokenSamePosition.java (revision ) @@ -20,8 +20,7 @@ import java.io.IOException; import java.io.Reader; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.ReusableAnalyzerBase; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; @@ -64,10 +63,10 @@ } } -final class BugReproAnalyzer extends Analyzer{ +final class BugReproAnalyzer extends ReusableAnalyzerBase { @Override - public TokenStream tokenStream(String arg0, Reader arg1) { - return new BugReproAnalyzerTokenizer(); + public TokenStreamComponents createComponents(String arg0, Reader arg1) { + return new TokenStreamComponents(new BugReproAnalyzerTokenizer()); } } Index: modules/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java =================================================================== --- modules/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java (revision 1161488) +++ modules/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java (revision ) @@ -18,6 +18,7 @@ */ import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.ReusableAnalyzerBase; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; @@ -41,7 +42,7 @@ } } - private static class CannedAnalyzer extends Analyzer { + private static class CannedAnalyzer extends ReusableAnalyzerBase { private final TokenAndPos[] tokens; public CannedAnalyzer(TokenAndPos[] tokens) { @@ -49,8 +50,8 @@ } @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new CannedTokenizer(tokens); + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + return new TokenStreamComponents(new CannedTokenizer(tokens)); } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java (revision 1104519) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java (revision ) @@ -19,11 +19,8 @@ import java.io.Reader; import java.io.StringReader; import java.util.Arrays; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.BaseTokenStreamTestCase; -import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.TokenStream; + +import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.util.CharArraySet; @@ -87,11 +84,12 @@ * @return Map */ public void testCommonGramsQueryFilter() throws Exception { - Analyzer a = new Analyzer() { + Analyzer a = new ReusableAnalyzerBase() { @Override - public TokenStream tokenStream(String field, Reader in) { - return new CommonGramsQueryFilter(new CommonGramsFilter(TEST_VERSION_CURRENT, - new MockTokenizer(in, MockTokenizer.WHITESPACE, false), commonWords)); + public TokenStreamComponents createComponents(String field, Reader in) { + Tokenizer tokenizer = new MockTokenizer(in, MockTokenizer.WHITESPACE, false); + return new TokenStreamComponents(tokenizer, new CommonGramsQueryFilter(new CommonGramsFilter(TEST_VERSION_CURRENT, + tokenizer, commonWords))); } }; @@ -156,11 +154,12 @@ } public void testCommonGramsFilter() throws Exception { - Analyzer a = new Analyzer() { + Analyzer a = new ReusableAnalyzerBase() { @Override - public TokenStream tokenStream(String field, Reader in) { - return new CommonGramsFilter(TEST_VERSION_CURRENT, - new MockTokenizer(in, MockTokenizer.WHITESPACE, false), commonWords); + public TokenStreamComponents createComponents(String field, Reader in) { + Tokenizer tokenizer = new MockTokenizer(in, MockTokenizer.WHITESPACE, false); + return new TokenStreamComponents(tokenizer, new CommonGramsFilter(TEST_VERSION_CURRENT, + tokenizer, commonWords)); } }; Index: modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyFilter.java =================================================================== --- modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyFilter.java (revision 1158819) +++ modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyFilter.java (revision ) @@ -20,9 +20,7 @@ import com.ibm.icu.text.Collator; -import org.apache.lucene.analysis.CollationTestBase; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.core.KeywordTokenizer; import org.apache.lucene.util.BytesRef; @@ -46,7 +44,7 @@ (collator.getCollationKey(secondRangeEndOriginal).toByteArray())); - public final class TestAnalyzer extends Analyzer { + public final class TestAnalyzer extends ReusableAnalyzerBase { private Collator _collator; TestAnalyzer(Collator collator) { @@ -54,10 +52,9 @@ } @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new KeywordTokenizer(reader); - result = new ICUCollationKeyFilter(result, _collator); - return result; + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer result = new KeywordTokenizer(reader); + return new TokenStreamComponents(result, new ICUCollationKeyFilter(result, _collator)); } } Index: lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java (revision 1162347) +++ lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java (revision ) @@ -137,7 +137,7 @@ super.tearDown(); } - private class MyTokenStream extends TokenStream { + private class MyTokenStream extends Tokenizer { private int tokenUpto; private final CharTermAttribute termAtt; @@ -175,10 +175,10 @@ } } - private class MyAnalyzer extends Analyzer { + private class MyAnalyzer extends ReusableAnalyzerBase { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new MyTokenStream(); + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + return new TokenStreamComponents(new MyTokenStream()); } } Index: modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestQPHelper.java =================================================================== --- modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestQPHelper.java (revision 1162347) +++ modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestQPHelper.java (revision ) @@ -128,12 +128,13 @@ } } - public static final class QPTestAnalyzer extends Analyzer { + public static final class QPTestAnalyzer extends ReusableAnalyzerBase { /** Filters MockTokenizer with StopFilter. */ @Override - public final TokenStream tokenStream(String fieldName, Reader reader) { - return new QPTestFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true)); + public final TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); + return new TokenStreamComponents(tokenizer, new QPTestFilter(tokenizer)); } } @@ -344,10 +345,10 @@ } } - private class SimpleCJKAnalyzer extends Analyzer { + private class SimpleCJKAnalyzer extends ReusableAnalyzerBase { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new SimpleCJKTokenizer(reader); + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + return new TokenStreamComponents(new SimpleCJKTokenizer(reader)); } } @@ -1241,10 +1242,10 @@ } } - private class CannedAnalyzer extends Analyzer { + private class CannedAnalyzer extends ReusableAnalyzerBase { @Override - public TokenStream tokenStream(String ignored, Reader alsoIgnored) { - return new CannedTokenStream(); + public TokenStreamComponents createComponents(String ignored, Reader alsoIgnored) { + return new TokenStreamComponents(new CannedTokenStream()); } } Index: lucene/src/test/org/apache/lucene/TestAssertions.java =================================================================== --- lucene/src/test/org/apache/lucene/TestAssertions.java (revision 1065304) +++ lucene/src/test/org/apache/lucene/TestAssertions.java (revision ) @@ -19,6 +19,7 @@ import java.io.Reader; +import org.apache.lucene.analysis.ReusableAnalyzerBase; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; @@ -34,33 +35,37 @@ } } - static class TestAnalyzer1 extends Analyzer { + static class TestAnalyzer1 extends ReusableAnalyzerBase { + @Override - public final TokenStream tokenStream(String s, Reader r) { return null; } - @Override - public final TokenStream reusableTokenStream(String s, Reader r) { return null; } + protected TokenStreamComponents createComponents(String fieldName, Reader aReader) { + return null; - } + } + } - static final class TestAnalyzer2 extends Analyzer { + static final class TestAnalyzer2 extends ReusableAnalyzerBase { + @Override - public TokenStream tokenStream(String s, Reader r) { return null; } - @Override - public TokenStream reusableTokenStream(String s, Reader r) { return null; } + protected TokenStreamComponents createComponents(String fieldName, Reader aReader) { + return null; - } + } + } - static class TestAnalyzer3 extends Analyzer { + static class TestAnalyzer3 extends ReusableAnalyzerBase { + @Override - public TokenStream tokenStream(String s, Reader r) { return null; } - @Override - public TokenStream reusableTokenStream(String s, Reader r) { return null; } + protected TokenStreamComponents createComponents(String fieldName, Reader aReader) { + return null; - } + } + } - static class TestAnalyzer4 extends Analyzer { + static class TestAnalyzer4 extends ReusableAnalyzerBase { + @Override - public final TokenStream tokenStream(String s, Reader r) { return null; } - @Override - public TokenStream reusableTokenStream(String s, Reader r) { return null; } + protected TokenStreamComponents createComponents(String fieldName, Reader aReader) { + return null; - } + } + } static class TestTokenStream1 extends TokenStream { @Override Index: lucene/src/test-framework/org/apache/lucene/analysis/MockAnalyzer.java =================================================================== --- lucene/src/test-framework/org/apache/lucene/analysis/MockAnalyzer.java (revision 1133599) +++ lucene/src/test-framework/org/apache/lucene/analysis/MockAnalyzer.java (revision ) @@ -42,7 +42,7 @@ * * @see MockTokenizer */ -public final class MockAnalyzer extends Analyzer { +public final class MockAnalyzer extends ReusableAnalyzerBase { private final CharacterRunAutomaton runAutomaton; private final boolean lowerCase; private final CharacterRunAutomaton filter; @@ -62,6 +62,7 @@ * @param enablePositionIncrements true if position increments should reflect filtered terms. */ public MockAnalyzer(Random random, CharacterRunAutomaton runAutomaton, boolean lowerCase, CharacterRunAutomaton filter, boolean enablePositionIncrements) { + super(new PerFieldReuseStrategy()); this.random = random; this.runAutomaton = runAutomaton; this.lowerCase = lowerCase; @@ -88,43 +89,13 @@ } @Override - public TokenStream tokenStream(String fieldName, Reader reader) { + public TokenStreamComponents createComponents(String fieldName, Reader reader) { MockTokenizer tokenizer = new MockTokenizer(reader, runAutomaton, lowerCase); tokenizer.setEnableChecks(enableChecks); TokenFilter filt = new MockTokenFilter(tokenizer, filter, enablePositionIncrements); - filt = maybePayload(filt, fieldName); - return filt; + return new TokenStreamComponents(tokenizer, maybePayload(filt, fieldName)); } - + - private class SavedStreams { - MockTokenizer tokenizer; - TokenFilter filter; - } - - @Override - public TokenStream reusableTokenStream(String fieldName, Reader reader) - throws IOException { - @SuppressWarnings("unchecked") Map map = (Map) getPreviousTokenStream(); - if (map == null) { - map = new HashMap(); - setPreviousTokenStream(map); - } - - SavedStreams saved = map.get(fieldName); - if (saved == null) { - saved = new SavedStreams(); - saved.tokenizer = new MockTokenizer(reader, runAutomaton, lowerCase); - saved.tokenizer.setEnableChecks(enableChecks); - saved.filter = new MockTokenFilter(saved.tokenizer, filter, enablePositionIncrements); - saved.filter = maybePayload(saved.filter, fieldName); - map.put(fieldName, saved); - return saved.filter; - } else { - saved.tokenizer.reset(reader); - return saved.filter; - } - } - private synchronized TokenFilter maybePayload(TokenFilter stream, String fieldName) { Integer val = previousMappings.get(fieldName); if (val == null) { Index: lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java (revision 1162347) +++ lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java (revision ) @@ -190,7 +190,7 @@ assertFalse("queries with different inclusive are not equal", query.equals(other)); } - private static class SingleCharAnalyzer extends Analyzer { + private static class SingleCharAnalyzer extends ReusableAnalyzerBase { private static class SingleCharTokenizer extends Tokenizer { char[] buffer = new char[1]; @@ -225,21 +225,10 @@ } @Override - public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { - Tokenizer tokenizer = (Tokenizer) getPreviousTokenStream(); - if (tokenizer == null) { - tokenizer = new SingleCharTokenizer(reader); - setPreviousTokenStream(tokenizer); - } else - tokenizer.reset(reader); - return tokenizer; + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + return new TokenStreamComponents(new SingleCharTokenizer(reader)); } - - @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new SingleCharTokenizer(reader); - } + } - } private void initializeIndex(String[] values) throws IOException { initializeIndex(values, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)); Index: modules/facet/src/test/org/apache/lucene/facet/search/CategoryListIteratorTest.java =================================================================== --- modules/facet/src/test/org/apache/lucene/facet/search/CategoryListIteratorTest.java (revision 1162347) +++ modules/facet/src/test/org/apache/lucene/facet/search/CategoryListIteratorTest.java (revision ) @@ -5,10 +5,7 @@ import java.util.HashSet; import java.util.Set; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.document.Document; @@ -140,10 +137,10 @@ DataTokenStream dts2 = new DataTokenStream("2",new SortingIntEncoder( new UniqueValuesIntEncoder(new DGapIntEncoder(new VInt8IntEncoder())))); // this test requires that no payloads ever be randomly present! - final Analyzer noPayloadsAnalyzer = new Analyzer() { + final Analyzer noPayloadsAnalyzer = new ReusableAnalyzerBase() { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new MockTokenizer(reader, MockTokenizer.KEYWORD, false); + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.KEYWORD, false)); } }; // NOTE: test is wired to LogMP... because test relies on certain docids having payloads Index: lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java =================================================================== --- lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java (revision 1162347) +++ lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java (revision ) @@ -55,12 +55,11 @@ private static byte[] payload2 = new byte[]{2}; private static byte[] payload4 = new byte[]{4}; - private static class PayloadAnalyzer extends Analyzer { + private static class PayloadAnalyzer extends ReusableAnalyzerBase { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); - result = new PayloadFilter(result, fieldName); - return result; + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); + return new TokenStreamComponents(result, new PayloadFilter(result, fieldName)); } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/PatternAnalyzerTest.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/PatternAnalyzerTest.java (revision 948195) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/PatternAnalyzerTest.java (revision ) @@ -18,6 +18,7 @@ */ import java.io.IOException; +import java.io.StringReader; import java.util.Arrays; import java.util.regex.Pattern; @@ -128,7 +129,7 @@ assertTokenStreamContents(ts, expected); // analysis of a String, uses PatternAnalyzer.tokenStream(String, String) - TokenStream ts2 = analyzer.tokenStream("dummy", document); + TokenStream ts2 = analyzer.tokenStream("dummy", new StringReader(document)); assertTokenStreamContents(ts2, expected); } } Index: modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java =================================================================== --- modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java (revision 1161986) +++ modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java (revision ) @@ -27,6 +27,7 @@ import java.util.regex.Pattern; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.ReusableAnalyzerBase; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.StopAnalyzer; @@ -66,7 +67,7 @@ * @deprecated (4.0) use the pattern-based analysis in the analysis/pattern package instead. */ @Deprecated -public final class PatternAnalyzer extends Analyzer { +public final class PatternAnalyzer extends ReusableAnalyzerBase { /** "\\W+"; Divides text at non-letters (NOT Character.isLetter(c)) */ public static final Pattern NON_WORD_PATTERN = Pattern.compile("\\W+"); @@ -187,25 +188,21 @@ * the string to tokenize * @return a new token stream */ - public TokenStream tokenStream(String fieldName, String text) { + public TokenStreamComponents createComponents(String fieldName, String text) { // Ideally the Analyzer superclass should have a method with the same signature, // with a default impl that simply delegates to the StringReader flavour. if (text == null) throw new IllegalArgumentException("text must not be null"); - TokenStream stream; if (pattern == NON_WORD_PATTERN) { // fast path - stream = new FastStringTokenizer(text, true, toLowerCase, stopWords); + return new TokenStreamComponents(new FastStringTokenizer(text, true, toLowerCase, stopWords)); + } else if (pattern == WHITESPACE_PATTERN) { // fast path + return new TokenStreamComponents(new FastStringTokenizer(text, false, toLowerCase, stopWords)); } - else if (pattern == WHITESPACE_PATTERN) { // fast path - stream = new FastStringTokenizer(text, false, toLowerCase, stopWords); - } - else { - stream = new PatternTokenizer(text, pattern, toLowerCase); - if (stopWords != null) stream = new StopFilter(matchVersion, stream, stopWords); - } - + - return stream; + Tokenizer tokenizer = new PatternTokenizer(text, pattern, toLowerCase); + TokenStream result = (stopWords != null) ? new StopFilter(matchVersion, tokenizer, stopWords) : tokenizer; + return new TokenStreamComponents(tokenizer, result); } /** @@ -220,10 +217,10 @@ * @return a new token stream */ @Override - public TokenStream tokenStream(String fieldName, Reader reader) { + public TokenStreamComponents createComponents(String fieldName, Reader reader) { try { String text = toString(reader); - return tokenStream(fieldName, text); + return createComponents(fieldName, text); } catch (IOException e) { throw new RuntimeException(e); } Index: lucene/src/test/org/apache/lucene/index/TestPayloads.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestPayloads.java (revision 1162347) +++ lucene/src/test/org/apache/lucene/index/TestPayloads.java (revision ) @@ -25,11 +25,7 @@ import java.util.List; import java.util.Map; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; @@ -105,12 +101,12 @@ // so this field is used to check if the DocumentWriter correctly enables the payloads bit // even if only some term positions have payloads d.add(newField("f2", "This field has payloads in all docs", TextField.TYPE_UNSTORED)); - d.add(newField("f2", "This field has payloads in all docs", TextField.TYPE_UNSTORED)); + d.add(newField("f2", "This field has payloads in all docs NO PAYLOAD", TextField.TYPE_UNSTORED)); // this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads // enabled in only some documents d.add(newField("f3", "This field has payloads in some docs", TextField.TYPE_UNSTORED)); // only add payload data for field f2 - analyzer.setPayloadData("f2", 1, "somedata".getBytes(), 0, 1); + analyzer.setPayloadData("f2", "somedata".getBytes(), 0, 1); writer.addDocument(d); // flush writer.close(); Index: lucene/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java (revision 1162347) +++ lucene/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java (revision ) @@ -21,10 +21,7 @@ import java.io.Reader; import java.util.concurrent.atomic.AtomicInteger; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.TextField; @@ -114,11 +111,12 @@ assertEquals("Wrong payload for the target " + target + ": " + b.bytes[b.offset], (byte) target, b.bytes[b.offset]); } - private static class PayloadAnalyzer extends Analyzer { + private static class PayloadAnalyzer extends ReusableAnalyzerBase { private final AtomicInteger payloadCount = new AtomicInteger(-1); @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new PayloadFilter(payloadCount, new MockTokenizer(reader, MockTokenizer.WHITESPACE, true)); + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); + return new TokenStreamComponents(tokenizer, new PayloadFilter(payloadCount, tokenizer)); } } Index: modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java =================================================================== --- modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java (revision 1096339) +++ modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java (revision ) @@ -20,20 +20,18 @@ import java.io.IOException; import java.io.Reader; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.BaseTokenStreamTestCase; -import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.core.WhitespaceTokenizer; /** * Tests ICUFoldingFilter */ public class TestICUFoldingFilter extends BaseTokenStreamTestCase { - Analyzer a = new Analyzer() { + Analyzer a = new ReusableAnalyzerBase() { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new ICUFoldingFilter( - new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader)); + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader); + return new TokenStreamComponents(tokenizer, new ICUFoldingFilter(tokenizer)); } }; public void testDefaults() throws IOException { Index: lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java (revision 1162347) +++ lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java (revision ) @@ -27,11 +27,7 @@ import java.util.List; import java.util.Random; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.*; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; @@ -390,12 +386,12 @@ doc.add(newField("field", "a field", TextField.TYPE_STORED)); w.addDocument(doc); - Analyzer analyzer = new Analyzer() { + Analyzer analyzer = new ReusableAnalyzerBase(new ReusableAnalyzerBase.PerFieldReuseStrategy()) { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { + public TokenStreamComponents createComponents(String fieldName, Reader reader) { MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases. - return new CrashingFilter(fieldName, tokenizer); + return new TokenStreamComponents(tokenizer, new CrashingFilter(fieldName, tokenizer)); } }; @@ -458,13 +454,13 @@ // LUCENE-1072 public void testExceptionFromTokenStream() throws IOException { Directory dir = newDirectory(); - IndexWriterConfig conf = newIndexWriterConfig( TEST_VERSION_CURRENT, new Analyzer() { + IndexWriterConfig conf = newIndexWriterConfig( TEST_VERSION_CURRENT, new ReusableAnalyzerBase() { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { + public TokenStreamComponents createComponents(String fieldName, Reader reader) { MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases. - return new TokenFilter(tokenizer) { + return new TokenStreamComponents(tokenizer, new TokenFilter(tokenizer) { private int count = 0; @Override @@ -480,7 +476,7 @@ super.reset(); this.count = 0; } - }; + }); } }); @@ -595,12 +591,12 @@ } public void testDocumentsWriterExceptions() throws IOException { - Analyzer analyzer = new Analyzer() { + Analyzer analyzer = new ReusableAnalyzerBase(new ReusableAnalyzerBase.PerFieldReuseStrategy()) { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { + public TokenStreamComponents createComponents(String fieldName, Reader reader) { MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases. - return new CrashingFilter(fieldName, tokenizer); + return new TokenStreamComponents(tokenizer, new CrashingFilter(fieldName, tokenizer)); } }; @@ -691,12 +687,12 @@ } public void testDocumentsWriterExceptionThreads() throws Exception { - Analyzer analyzer = new Analyzer() { + Analyzer analyzer = new ReusableAnalyzerBase(new ReusableAnalyzerBase.PerFieldReuseStrategy()) { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { + public TokenStreamComponents createComponents(String fieldName, Reader reader) { MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases. - return new CrashingFilter(fieldName, tokenizer); + return new TokenStreamComponents(tokenizer, new CrashingFilter(fieldName, tokenizer)); } }; Index: lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java (revision 1162347) +++ lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java (revision ) @@ -20,10 +20,7 @@ import java.io.IOException; import java.io.Reader; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.*; import org.apache.lucene.document.Document; import org.apache.lucene.document.TextField; import org.apache.lucene.index.codecs.CodecProvider; @@ -71,10 +68,10 @@ private void createIndex(int numHits) throws IOException { int numDocs = 500; - final Analyzer analyzer = new Analyzer() { + final Analyzer analyzer = new ReusableAnalyzerBase() { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true)); } }; Directory directory = new SeekCountingDirectory(new RAMDirectory()); Index: modules/analysis/common/src/test/org/apache/lucene/analysis/cn/TestChineseTokenizer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/cn/TestChineseTokenizer.java (revision 948195) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/cn/TestChineseTokenizer.java (revision ) @@ -21,9 +21,7 @@ import java.io.Reader; import java.io.StringReader; -import org.apache.lucene.analysis.BaseTokenStreamTestCase; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.util.Version; @@ -66,10 +64,10 @@ * Analyzer that just uses ChineseTokenizer, not ChineseFilter. * convenience to show the behavior of the tokenizer */ - private class JustChineseTokenizerAnalyzer extends Analyzer { + private class JustChineseTokenizerAnalyzer extends ReusableAnalyzerBase { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new ChineseTokenizer(reader); + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + return new TokenStreamComponents(new ChineseTokenizer(reader)); } } @@ -77,10 +75,11 @@ * Analyzer that just uses ChineseFilter, not ChineseTokenizer. * convenience to show the behavior of the filter. */ - private class JustChineseFilterAnalyzer extends Analyzer { + private class JustChineseFilterAnalyzer extends ReusableAnalyzerBase { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new ChineseFilter(new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader)); + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader); + return new TokenStreamComponents(tokenizer, new ChineseFilter(tokenizer)); } } Index: modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyFilter.java (revision 1158819) +++ modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyFilter.java (revision ) @@ -18,9 +18,7 @@ */ -import org.apache.lucene.analysis.CollationTestBase; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.core.KeywordTokenizer; import org.apache.lucene.util.BytesRef; @@ -54,7 +52,7 @@ (collator.getCollationKey(secondRangeEndOriginal).toByteArray())); - public final class TestAnalyzer extends Analyzer { + public final class TestAnalyzer extends ReusableAnalyzerBase { private Collator _collator; TestAnalyzer(Collator collator) { @@ -62,10 +60,9 @@ } @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new KeywordTokenizer(reader); - result = new CollationKeyFilter(result, _collator); - return result; + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer result = new KeywordTokenizer(reader); + return new TokenStreamComponents(result, new CollationKeyFilter(result, _collator)); } } Index: modules/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiFieldQueryParser.java =================================================================== --- modules/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiFieldQueryParser.java (revision 1162347) +++ modules/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiFieldQueryParser.java (revision ) @@ -22,9 +22,7 @@ import java.util.HashMap; import java.util.Map; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.*; import org.apache.lucene.document.Document; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; @@ -302,22 +300,23 @@ /** * Return empty tokens for field "f1". */ - private static class AnalyzerReturningNull extends Analyzer { + private static class AnalyzerReturningNull extends ReusableAnalyzerBase { MockAnalyzer stdAnalyzer = new MockAnalyzer(random); public AnalyzerReturningNull() { + super(new PerFieldReuseStrategy()); } @Override - public TokenStream tokenStream(String fieldName, Reader reader) { + public TokenStreamComponents createComponents(String fieldName, Reader reader) { if ("f1".equals(fieldName)) { - return new EmptyTokenStream(); + return new TokenStreamComponents(new EmptyTokenStream()); } else { - return stdAnalyzer.tokenStream(fieldName, reader); + return stdAnalyzer.createComponents(fieldName, reader); } } - private static class EmptyTokenStream extends TokenStream { + private static class EmptyTokenStream extends Tokenizer { @Override public boolean incrementToken() throws IOException { return false; Index: lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java (revision 1162347) +++ lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java (revision ) @@ -17,6 +17,7 @@ * limitations under the License. */ +import org.apache.lucene.analysis.ReusableAnalyzerBase; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; @@ -342,7 +343,7 @@ } } - private static class CannedAnalyzer extends Analyzer { + private static class CannedAnalyzer extends ReusableAnalyzerBase { private final TokenAndPos[] tokens; public CannedAnalyzer(TokenAndPos[] tokens) { @@ -350,8 +351,8 @@ } @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new CannedTokenizer(tokens); + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + return new TokenStreamComponents(new CannedTokenizer(tokens)); } } Index: modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/precedence/TestPrecedenceQueryParser.java =================================================================== --- modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/precedence/TestPrecedenceQueryParser.java (revision 1161488) +++ modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/precedence/TestPrecedenceQueryParser.java (revision ) @@ -112,12 +112,13 @@ } } - public static final class QPTestAnalyzer extends Analyzer { + public static final class QPTestAnalyzer extends ReusableAnalyzerBase { /** Filters MockTokenizer with StopFilter. */ @Override - public final TokenStream tokenStream(String fieldName, Reader reader) { - return new QPTestFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true)); + public final TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); + return new TokenStreamComponents(tokenizer, new QPTestFilter(tokenizer)); } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java (revision 1162347) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java (revision ) @@ -148,41 +148,7 @@ new int[] { 6, 9, 9, 12, 12, 18, 18 }, new int[] { 1, 0, 1, 0, 1, 0, 1 }); } - + - /* - * analyzer that does not support reuse - * it is LetterTokenizer on odd invocations, WhitespaceTokenizer on even. - */ - private class NonreusableAnalyzer extends Analyzer { - int invocationCount = 0; - @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - if (++invocationCount % 2 == 0) - return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); - else - return new MockTokenizer(reader, MockTokenizer.SIMPLE, false); - } - } - - public void testWrappedAnalyzerDoesNotReuse() throws Exception { - Analyzer a = new ShingleAnalyzerWrapper(new NonreusableAnalyzer()); - assertAnalyzesToReuse(a, "please divide into shingles.", - new String[] { "please", "please divide", "divide", "divide into", "into", "into shingles", "shingles" }, - new int[] { 0, 0, 7, 7, 14, 14, 19 }, - new int[] { 6, 13, 13, 18, 18, 27, 27 }, - new int[] { 1, 0, 1, 0, 1, 0, 1 }); - assertAnalyzesToReuse(a, "please divide into shingles.", - new String[] { "please", "please divide", "divide", "divide into", "into", "into shingles.", "shingles." }, - new int[] { 0, 0, 7, 7, 14, 14, 19 }, - new int[] { 6, 13, 13, 18, 18, 28, 28 }, - new int[] { 1, 0, 1, 0, 1, 0, 1 }); - assertAnalyzesToReuse(a, "please divide into shingles.", - new String[] { "please", "please divide", "divide", "divide into", "into", "into shingles", "shingles" }, - new int[] { 0, 0, 7, 7, 14, 14, 19 }, - new int[] { 6, 13, 13, 18, 18, 27, 27 }, - new int[] { 1, 0, 1, 0, 1, 0, 1 }); - } - public void testNonDefaultMinShingleSize() throws Exception { ShingleAnalyzerWrapper analyzer = new ShingleAnalyzerWrapper(new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), 3, 4); Index: lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java =================================================================== --- lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java (revision 1162347) +++ lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java (revision ) @@ -24,11 +24,7 @@ import java.util.Collection; import java.util.List; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; @@ -198,10 +194,10 @@ return phraseQuery; } - static final class BigramAnalyzer extends Analyzer { + static final class BigramAnalyzer extends ReusableAnalyzerBase { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new BasicNGramTokenizer( reader ); + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + return new TokenStreamComponents(new BasicNGramTokenizer(reader)); } } Index: lucene/src/test/org/apache/lucene/search/spans/TestBasics.java =================================================================== --- lucene/src/test/org/apache/lucene/search/spans/TestBasics.java (revision 1162347) +++ lucene/src/test/org/apache/lucene/search/spans/TestBasics.java (revision ) @@ -25,6 +25,7 @@ import java.util.List; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; @@ -70,14 +71,12 @@ private static Directory directory; static final class SimplePayloadFilter extends TokenFilter { - String fieldName; int pos; final PayloadAttribute payloadAttr; final CharTermAttribute termAttr; - public SimplePayloadFilter(TokenStream input, String fieldName) { + public SimplePayloadFilter(TokenStream input) { super(input); - this.fieldName = fieldName; pos = 0; payloadAttr = input.addAttribute(PayloadAttribute.class); termAttr = input.addAttribute(CharTermAttribute.class); @@ -105,7 +104,7 @@ @Override public TokenStream tokenStream(String fieldName, Reader reader) { - return new SimplePayloadFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true), fieldName); + return new SimplePayloadFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true)); } }; Index: lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/IndexTimeSynonymTest.java =================================================================== --- lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/IndexTimeSynonymTest.java (revision 1161986) +++ lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/IndexTimeSynonymTest.java (revision ) @@ -22,9 +22,7 @@ import java.util.HashSet; import java.util.Set; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.Token; -import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanClause.Occur; @@ -292,15 +290,15 @@ return token; } - public static final class TokenArrayAnalyzer extends Analyzer { - Token[] tokens; + public static final class TokenArrayAnalyzer extends ReusableAnalyzerBase { + final Token[] tokens; - public TokenArrayAnalyzer( Token... tokens ){ + public TokenArrayAnalyzer(Token... tokens) { this.tokens = tokens; } @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream ts = new TokenStream(Token.TOKEN_ATTRIBUTE_FACTORY) { + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer ts = new Tokenizer(Token.TOKEN_ATTRIBUTE_FACTORY) { final AttributeImpl reusableToken = (AttributeImpl) addAttribute(CharTermAttribute.class); int p = 0; @@ -318,7 +316,7 @@ this.p = 0; } }; - return ts; + return new TokenStreamComponents(ts); } } } Index: lucene/src/test/org/apache/lucene/index/TestIndexWriterCommit.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexWriterCommit.java (revision 1162347) +++ lucene/src/test/org/apache/lucene/index/TestIndexWriterCommit.java (revision ) @@ -23,11 +23,7 @@ import java.util.Map; import java.util.concurrent.atomic.AtomicBoolean; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.analysis.MockFixedLengthPayloadFilter; -import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.*; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; @@ -179,21 +175,20 @@ Analyzer analyzer; if (random.nextBoolean()) { // no payloads - analyzer = new Analyzer() { + analyzer = new ReusableAnalyzerBase() { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true)); } }; } else { // fixed length payloads final int length = random.nextInt(200); - analyzer = new Analyzer() { + analyzer = new ReusableAnalyzerBase() { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new MockFixedLengthPayloadFilter(random, - new MockTokenizer(reader, MockTokenizer.WHITESPACE, true), - length); + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); + return new TokenStreamComponents(tokenizer, new MockFixedLengthPayloadFilter(random, tokenizer, length)); } }; } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java (revision 1150091) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java (revision ) @@ -17,12 +17,7 @@ package org.apache.lucene.analysis.miscellaneous; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.BaseTokenStreamTestCase; -import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.core.StopFilter; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; @@ -211,12 +206,13 @@ final CharArraySet protWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet(Arrays.asList("NUTCH")), false); /* analyzer that uses whitespace + wdf */ - Analyzer a = new Analyzer() { + Analyzer a = new ReusableAnalyzerBase() { @Override - public TokenStream tokenStream(String field, Reader reader) { - return new WordDelimiterFilter( - new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), - 1, 1, 0, 0, 1, 1, 0, 1, 1, protWords); + public TokenStreamComponents createComponents(String field, Reader reader) { + Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); + return new TokenStreamComponents(tokenizer, new WordDelimiterFilter( + tokenizer, + 1, 1, 0, 0, 1, 1, 0, 1, 1, protWords)); } }; @@ -238,13 +234,14 @@ new int[] { 1, 1, 1 }); /* analyzer that will consume tokens with large position increments */ - Analyzer a2 = new Analyzer() { + Analyzer a2 = new ReusableAnalyzerBase() { @Override - public TokenStream tokenStream(String field, Reader reader) { - return new WordDelimiterFilter( + public TokenStreamComponents createComponents(String field, Reader reader) { + Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); + return new TokenStreamComponents(tokenizer, new WordDelimiterFilter( new LargePosIncTokenFilter( - new MockTokenizer(reader, MockTokenizer.WHITESPACE, false)), - 1, 1, 0, 0, 1, 1, 0, 1, 1, protWords); + tokenizer), + 1, 1, 0, 0, 1, 1, 0, 1, 1, protWords)); } }; @@ -271,14 +268,14 @@ new int[] { 6, 14, 19 }, new int[] { 1, 11, 1 }); - Analyzer a3 = new Analyzer() { + Analyzer a3 = new ReusableAnalyzerBase() { @Override - public TokenStream tokenStream(String field, Reader reader) { - StopFilter filter = new StopFilter(TEST_VERSION_CURRENT, - new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), StandardAnalyzer.STOP_WORDS_SET); + public TokenStreamComponents createComponents(String field, Reader reader) { + Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); + StopFilter filter = new StopFilter(TEST_VERSION_CURRENT, tokenizer, StandardAnalyzer.STOP_WORDS_SET); filter.setEnablePositionIncrements(true); - return new WordDelimiterFilter(filter, - 1, 1, 0, 0, 1, 1, 0, 1, 1, protWords); + return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(filter, + 1, 1, 0, 0, 1, 1, 0, 1, 1, protWords)); } }; Index: lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java =================================================================== --- lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java (revision 1162347) +++ lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java (revision ) @@ -64,14 +64,16 @@ private static final byte[] payloadMultiField2 = new byte[]{4}; protected static Directory directory; - private static class PayloadAnalyzer extends Analyzer { + private static class PayloadAnalyzer extends ReusableAnalyzerBase { + private PayloadAnalyzer() { + super(new PerFieldReuseStrategy()); + } @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); - result = new PayloadFilter(result, fieldName); - return result; + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); + return new TokenStreamComponents(result, new PayloadFilter(result, fieldName)); } } Index: lucene/src/test/org/apache/lucene/index/TestTermdocPerf.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestTermdocPerf.java (revision 1162347) +++ lucene/src/test/org/apache/lucene/index/TestTermdocPerf.java (revision ) @@ -22,7 +22,7 @@ import java.util.Random; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.ReusableAnalyzerBase; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; @@ -77,10 +77,10 @@ void addDocs(final Random random, Directory dir, final int ndocs, String field, final String val, final int maxTF, final float percentDocs) throws IOException { final RepeatingTokenStream ts = new RepeatingTokenStream(val, random, percentDocs, maxTF); - Analyzer analyzer = new Analyzer() { + Analyzer analyzer = new ReusableAnalyzerBase() { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return ts; + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + return new TokenStreamComponents(ts); } }; Index: modules/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiAnalyzer.java =================================================================== --- modules/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiAnalyzer.java (revision 1161488) +++ modules/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiAnalyzer.java (revision ) @@ -122,16 +122,12 @@ * Expands "multi" to "multi" and "multi2", both at the same position, * and expands "triplemulti" to "triplemulti", "multi3", and "multi2". */ - private class MultiAnalyzer extends Analyzer { + private class MultiAnalyzer extends ReusableAnalyzerBase { - public MultiAnalyzer() { - } - @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); - result = new TestFilter(result); - return result; + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); + return new TokenStreamComponents(result, new TestFilter(result)); } } @@ -196,16 +192,12 @@ * Analyzes "the quick brown" as: quick(incr=2) brown(incr=1). * Does not work correctly for input other than "the quick brown ...". */ - private class PosIncrementAnalyzer extends Analyzer { + private class PosIncrementAnalyzer extends ReusableAnalyzerBase { - public PosIncrementAnalyzer() { - } - @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); - result = new TestPosIncrementFilter(result); - return result; + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); + return new TokenStreamComponents(result, new TestPosIncrementFilter(result)); } } Index: modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java =================================================================== --- modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java (revision 948195) +++ modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java (revision ) @@ -25,6 +25,7 @@ import java.util.Set; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.ReusableAnalyzerBase; import org.apache.lucene.analysis.en.PorterStemFilter; import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.analysis.TokenStream; @@ -54,7 +55,7 @@ *

* @lucene.experimental */ -public final class SmartChineseAnalyzer extends Analyzer { +public final class SmartChineseAnalyzer extends ReusableAnalyzerBase { private final Set stopWords; @@ -141,9 +142,9 @@ } @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new SentenceTokenizer(reader); - result = new WordTokenFilter(result); + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = new SentenceTokenizer(reader); + TokenStream result = new WordTokenFilter(tokenizer); // result = new LowerCaseFilter(result); // LowerCaseFilter is not needed, as SegTokenFilter lowercases Basic Latin text. // The porter stemming is too strict, this is not a bug, this is a feature:) @@ -151,32 +152,6 @@ if (!stopWords.isEmpty()) { result = new StopFilter(matchVersion, result, stopWords, false); } - return result; + return new TokenStreamComponents(tokenizer, result); } - - private static final class SavedStreams { - Tokenizer tokenStream; - TokenStream filteredTokenStream; - } +} - - @Override - public TokenStream reusableTokenStream(String fieldName, Reader reader) - throws IOException { - SavedStreams streams = (SavedStreams) getPreviousTokenStream(); - if (streams == null) { - streams = new SavedStreams(); - setPreviousTokenStream(streams); - streams.tokenStream = new SentenceTokenizer(reader); - streams.filteredTokenStream = new WordTokenFilter(streams.tokenStream); - streams.filteredTokenStream = new PorterStemFilter(streams.filteredTokenStream); - if (!stopWords.isEmpty()) { - streams.filteredTokenStream = new StopFilter(matchVersion, streams.filteredTokenStream, stopWords, false); - } - } else { - streams.tokenStream.reset(reader); - streams.filteredTokenStream.reset(); // reset WordTokenFilter's state - } - - return streams.filteredTokenStream; - } -} Index: lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/TokenSourcesTest.java =================================================================== --- lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/TokenSourcesTest.java (revision 1162347) +++ lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/TokenSourcesTest.java (revision ) @@ -20,9 +20,7 @@ import java.io.IOException; import java.io.Reader; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.Token; -import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; @@ -50,15 +48,15 @@ public class TokenSourcesTest extends LuceneTestCase { private static final String FIELD = "text"; - private static final class OverlapAnalyzer extends Analyzer { + private static final class OverlapAnalyzer extends ReusableAnalyzerBase { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new TokenStreamOverlap(); + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + return new TokenStreamComponents(new TokenStreamOverlap()); } } - private static final class TokenStreamOverlap extends TokenStream { + private static final class TokenStreamOverlap extends Tokenizer { private Token[] tokens; private int i = -1; Index: modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java (revision 1096178) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java (revision ) @@ -21,10 +21,7 @@ import java.io.Reader; import java.io.StringReader; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.BaseTokenStreamTestCase; -import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; @@ -120,12 +117,12 @@ String[] y = StandardTokenizer.TOKEN_TYPES; } - private static class LowerCaseWhitespaceAnalyzer extends Analyzer { + private static class LowerCaseWhitespaceAnalyzer extends ReusableAnalyzerBase { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new LowerCaseFilter(TEST_VERSION_CURRENT, - new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader)); + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader); + return new TokenStreamComponents(tokenizer, new LowerCaseFilter(TEST_VERSION_CURRENT, tokenizer)); } } @@ -237,4 +234,4 @@ data[0]++; return true; } -} \ No newline at end of file +} Index: modules/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java =================================================================== --- modules/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java (revision 1162347) +++ modules/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java (revision ) @@ -25,13 +25,7 @@ import java.util.GregorianCalendar; import java.util.Locale; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.analysis.MockTokenFilter; -import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; @@ -104,12 +98,13 @@ } - public static final class QPTestAnalyzer extends Analyzer { + public static final class QPTestAnalyzer extends ReusableAnalyzerBase { /** Filters MockTokenizer with StopFilter. */ @Override - public final TokenStream tokenStream(String fieldName, Reader reader) { - return new QPTestFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true)); + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); + return new TokenStreamComponents(tokenizer, new QPTestFilter(tokenizer)); } } @@ -245,10 +240,10 @@ } } - private class SimpleCJKAnalyzer extends Analyzer { + private class SimpleCJKAnalyzer extends ReusableAnalyzerBase { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new SimpleCJKTokenizer(reader); + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + return new TokenStreamComponents(new SimpleCJKTokenizer(reader)); } } @@ -348,10 +343,10 @@ assertQueryEquals("a OR -b", null, "a -b"); // +,-,! should be directly adjacent to operand (i.e. not separated by whitespace) to be treated as an operator - Analyzer a = new Analyzer() { + Analyzer a = new ReusableAnalyzerBase() { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false)); } }; assertQueryEquals("a - b", a, "a - b"); @@ -1162,18 +1157,19 @@ } /** whitespace+lowercase analyzer with synonyms */ - private class Analyzer1 extends Analyzer { + private class Analyzer1 extends ReusableAnalyzerBase { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new MockSynonymFilter(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true)); + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); + return new TokenStreamComponents(tokenizer, new MockSynonymFilter(tokenizer)); } } /** whitespace+lowercase analyzer without synonyms */ - private class Analyzer2 extends Analyzer { + private class Analyzer2 extends ReusableAnalyzerBase { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true)); } } @@ -1235,10 +1231,11 @@ } } - private class MockCollationAnalyzer extends Analyzer { + private class MockCollationAnalyzer extends ReusableAnalyzerBase { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new MockCollationFilter(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true)); + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); + return new TokenStreamComponents(tokenizer, new MockCollationFilter(tokenizer)); } } Index: modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestMultiFieldQPHelper.java =================================================================== --- modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestMultiFieldQPHelper.java (revision 1162347) +++ modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestMultiFieldQPHelper.java (revision ) @@ -21,9 +21,7 @@ import java.util.HashMap; import java.util.Map; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.*; import org.apache.lucene.document.Document; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; @@ -340,22 +338,23 @@ /** * Return empty tokens for field "f1". */ - private static final class AnalyzerReturningNull extends Analyzer { + private static final class AnalyzerReturningNull extends ReusableAnalyzerBase { MockAnalyzer stdAnalyzer = new MockAnalyzer(random); public AnalyzerReturningNull() { + super(new PerFieldReuseStrategy()); } @Override - public TokenStream tokenStream(String fieldName, Reader reader) { + public TokenStreamComponents createComponents(String fieldName, Reader reader) { if ("f1".equals(fieldName)) { - return new EmptyTokenStream(); + return new TokenStreamComponents(new EmptyTokenStream()); } else { - return stdAnalyzer.tokenStream(fieldName, reader); + return stdAnalyzer.createComponents(fieldName, reader); } } - private static class EmptyTokenStream extends TokenStream { + private static class EmptyTokenStream extends Tokenizer { @Override public boolean incrementToken() { return false; Index: lucene/src/test/org/apache/lucene/search/TestPhraseQuery.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestPhraseQuery.java (revision 1162347) +++ lucene/src/test/org/apache/lucene/search/TestPhraseQuery.java (revision ) @@ -54,10 +54,10 @@ @BeforeClass public static void beforeClass() throws Exception { directory = newDirectory(); - Analyzer analyzer = new Analyzer() { + Analyzer analyzer = new ReusableAnalyzerBase() { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false)); } @Override Index: modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java (revision 1162347) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java (revision ) @@ -144,32 +144,6 @@ assertTokenStreamContents(protectedTokenStream, new String[]{"boring"}); } - /* - * analyzer that does not support reuse - * it is LetterTokenizer on odd invocations, WhitespaceTokenizer on even. - */ - private class NonreusableAnalyzer extends Analyzer { - int invocationCount = 0; - @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - if (++invocationCount % 2 == 0) - return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); - else - return new MockTokenizer(reader, MockTokenizer.SIMPLE, false); - } - } - - public void testWrappingNonReusableAnalyzer() throws Exception { - QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new NonreusableAnalyzer()); - a.addStopWords(reader, 10); - - TokenStream tokenStream = a.reusableTokenStream("repetitiveField", new StringReader("boring")); - assertTokenStreamContents(tokenStream, new String[0]); - - tokenStream = a.reusableTokenStream("repetitiveField", new StringReader("vaguelyboring")); - assertTokenStreamContents(tokenStream, new String[0]); - } - public void testTokenStream() throws Exception { QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)); a.addStopWords(reader, 10); Index: lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java (revision 1162347) +++ lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java (revision ) @@ -56,10 +56,10 @@ final static boolean VERBOSE = false; public void testSetPosition() throws Exception { - Analyzer analyzer = new Analyzer() { + Analyzer analyzer = new ReusableAnalyzerBase() { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new TokenStream() { + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + return new TokenStreamComponents(new Tokenizer() { private final String[] TOKENS = {"1", "2", "3", "4", "5"}; private final int[] INCREMENTS = {0, 2, 1, 0, 1}; private int i = 0; @@ -85,7 +85,7 @@ super.reset(); this.i = 0; } - }; + }); } }; Directory store = newDirectory(); Index: lucene/src/test/org/apache/lucene/search/payloads/PayloadHelper.java =================================================================== --- lucene/src/test/org/apache/lucene/search/payloads/PayloadHelper.java (revision 1162347) +++ lucene/src/test/org/apache/lucene/search/payloads/PayloadHelper.java (revision ) @@ -55,14 +55,16 @@ public IndexReader reader; - public final class PayloadAnalyzer extends Analyzer { + public final class PayloadAnalyzer extends ReusableAnalyzerBase { + public PayloadAnalyzer() { + super(new PerFieldReuseStrategy()); + } @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); - result = new PayloadFilter(result, fieldName); - return result; + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); + return new TokenStreamComponents(result, new PayloadFilter(result, fieldName)); } } Index: lucene/src/test/org/apache/lucene/index/TestIndexWriter.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (revision 1163568) +++ lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (revision ) @@ -31,11 +31,7 @@ import java.util.Map; import java.util.Random; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.document.BinaryField; @@ -1709,10 +1705,10 @@ dir.close(); } - static final class StringSplitAnalyzer extends Analyzer { + static final class StringSplitAnalyzer extends ReusableAnalyzerBase { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new StringSplitTokenizer(reader); + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + return new TokenStreamComponents(new StringSplitTokenizer(reader)); } } Index: lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java =================================================================== --- lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (revision 1162347) +++ lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (revision ) @@ -1802,7 +1802,7 @@ // behaviour to synonyms // =================================================================== -final class SynonymAnalyzer extends Analyzer { +final class SynonymAnalyzer extends ReusableAnalyzerBase { private Map synonyms; public SynonymAnalyzer(Map synonyms) { @@ -1816,12 +1816,12 @@ * java.io.Reader) */ @Override - public TokenStream tokenStream(String arg0, Reader arg1) { + public TokenStreamComponents createComponents(String arg0, Reader arg1) { Tokenizer stream = new MockTokenizer(arg1, MockTokenizer.SIMPLE, true); stream.addAttribute(CharTermAttribute.class); stream.addAttribute(PositionIncrementAttribute.class); stream.addAttribute(OffsetAttribute.class); - return new SynonymTokenizer(stream, synonyms); + return new TokenStreamComponents(stream, new SynonymTokenizer(stream, synonyms)); } } Index: lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java (revision 1162347) +++ lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java (revision ) @@ -20,11 +20,7 @@ import java.io.IOException; import java.io.Reader; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; @@ -107,10 +103,10 @@ } public void testPositionIncrementGap() throws IOException { - Analyzer analyzer = new Analyzer() { + Analyzer analyzer = new ReusableAnalyzerBase() { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false)); } @Override @@ -142,10 +138,11 @@ } public void testTokenReuse() throws IOException { - Analyzer analyzer = new Analyzer() { + Analyzer analyzer = new ReusableAnalyzerBase() { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new TokenFilter(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false)) { + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); + return new TokenStreamComponents(tokenizer, new TokenFilter(tokenizer) { boolean first = true; AttributeSource.State state; @@ -187,7 +184,7 @@ final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class); final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); - }; + }); } }; Index: lucene/src/test-framework/org/apache/lucene/analysis/MockPayloadAnalyzer.java =================================================================== --- lucene/src/test-framework/org/apache/lucene/analysis/MockPayloadAnalyzer.java (revision 1102290) +++ lucene/src/test-framework/org/apache/lucene/analysis/MockPayloadAnalyzer.java (revision ) @@ -30,16 +30,15 @@ * * **/ -public final class MockPayloadAnalyzer extends Analyzer { +public final class MockPayloadAnalyzer extends ReusableAnalyzerBase { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); - return new MockPayloadFilter(result, fieldName); + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); + return new TokenStreamComponents(result, new MockPayloadFilter(result, fieldName)); } } - /** * * Index: lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java (revision 1162347) +++ lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java (revision ) @@ -26,10 +26,7 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicBoolean; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.*; import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.StringField; @@ -902,10 +899,10 @@ final Random r = random; Directory dir = newDirectory(); // note this test explicitly disables payloads - final Analyzer analyzer = new Analyzer() { + final Analyzer analyzer = new ReusableAnalyzerBase() { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true)); } }; IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer).setRAMBufferSizeMB(1.0).setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH)); Index: modules/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java =================================================================== --- modules/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java (revision 1145430) +++ modules/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java (revision ) @@ -20,10 +20,7 @@ import java.io.IOException; import java.io.Reader; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.util.LuceneTestCase; @@ -137,14 +134,11 @@ } } -final class ASCIIAnalyzer extends org.apache.lucene.analysis.Analyzer { - public ASCIIAnalyzer() { - } +final class ASCIIAnalyzer extends ReusableAnalyzerBase { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); - result = new TestFoldingFilter(result); - return result; + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); + return new TokenStreamComponents(result, new TestFoldingFilter(result)); } } Index: lucene/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java =================================================================== --- lucene/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java (revision 1162347) +++ lucene/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java (revision ) @@ -23,10 +23,7 @@ import java.util.HashSet; import java.util.Set; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; @@ -479,18 +476,16 @@ assertEquals(numSpans, cnt); } - final class PayloadAnalyzer extends Analyzer { + final class PayloadAnalyzer extends ReusableAnalyzerBase { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); - result = new PayloadFilter(result, fieldName); - return result; + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); + return new TokenStreamComponents(result, new PayloadFilter(result)); } } final class PayloadFilter extends TokenFilter { - String fieldName; Set entities = new HashSet(); Set nopayload = new HashSet(); int pos; @@ -498,9 +493,8 @@ CharTermAttribute termAtt; PositionIncrementAttribute posIncrAtt; - public PayloadFilter(TokenStream input, String fieldName) { + public PayloadFilter(TokenStream input) { super(input); - this.fieldName = fieldName; pos = 0; entities.add("xx"); entities.add("one"); @@ -536,13 +530,12 @@ } } - public final class TestPayloadAnalyzer extends Analyzer { + public final class TestPayloadAnalyzer extends ReusableAnalyzerBase { @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); - result = new PayloadFilter(result, fieldName); - return result; + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); + return new TokenStreamComponents(result, new PayloadFilter(result)); } } } Index: lucene/src/java/org/apache/lucene/analysis/ReusableAnalyzerBase.java =================================================================== --- lucene/src/java/org/apache/lucene/analysis/ReusableAnalyzerBase.java (revision 1160117) +++ lucene/src/java/org/apache/lucene/analysis/ReusableAnalyzerBase.java (revision ) @@ -17,8 +17,13 @@ * limitations under the License. */ +import org.apache.lucene.store.AlreadyClosedException; +import org.apache.lucene.util.CloseableThreadLocal; + import java.io.IOException; import java.io.Reader; +import java.util.HashMap; +import java.util.Map; /** * An convenience subclass of Analyzer that makes it easy to implement @@ -38,6 +43,16 @@ */ public abstract class ReusableAnalyzerBase extends Analyzer { + private final ReuseStrategy reuseStrategy; + + public ReusableAnalyzerBase() { + this(new GlobalReuseStrategy()); + } + + public ReusableAnalyzerBase(ReuseStrategy reuseStrategy) { + this.reuseStrategy = reuseStrategy; + } + /** * Creates a new {@link TokenStreamComponents} instance for this analyzer. * @@ -66,14 +81,13 @@ @Override public final TokenStream reusableTokenStream(final String fieldName, final Reader reader) throws IOException { - TokenStreamComponents streamChain = (TokenStreamComponents) - getPreviousTokenStream(); + TokenStreamComponents components = reuseStrategy.getReusableComponents(fieldName); final Reader r = initReader(reader); - if (streamChain == null || !streamChain.reset(r)) { - streamChain = createComponents(fieldName, r); - setPreviousTokenStream(streamChain); + if (components == null || !components.reset(r)) { + components = createComponents(fieldName, r); + reuseStrategy.setReusableComponents(fieldName, components); } - return streamChain.getTokenStream(); + return components.getTokenStream(); } /** @@ -98,8 +112,17 @@ protected Reader initReader(Reader reader) { return reader; } - + /** + * {@inheritDoc} + */ + @Override + public void close() { + super.close(); + reuseStrategy.close(); + } + + /** * This class encapsulates the outer components of a token stream. It provides * access to the source ({@link Tokenizer}) and the outer end (sink), an * instance of {@link TokenFilter} which also serves as the @@ -166,4 +189,124 @@ } + /** + * Strategy defining how TokenStreamComponents are reused per call to + * {@link ReusableAnalyzerBase#tokenStream(String, java.io.Reader)}. + */ + public static abstract class ReuseStrategy { + + private CloseableThreadLocal storedValue = new CloseableThreadLocal(); + + /** + * Gets the reusable TokenStreamComponents for the field with the given name + * + * @param fieldName Name of the field whose reusable TokenStreamComponents + * are to be retrieved + * @return Reusable TokenStreamComponents for the field, or {@code null} + * if there was no previous components for the field + */ + public abstract TokenStreamComponents getReusableComponents(String fieldName); + + /** + * Stores the given TokenStreamComponents as the reusable components for the + * field with the give name + * + * @param fieldName Name of the field whose TokenStreamComponents are being set + * @param components TokenStreamComponents which are to be reused for the field + */ + public abstract void setReusableComponents(String fieldName, TokenStreamComponents components); + + /** + * Returns the currently stored value + * + * @return Currently stored value or {@code null} if no value is stored + */ + protected final Object getStoredValue() { + try { + return storedValue.get(); + } catch (NullPointerException npe) { + if (storedValue == null) { + throw new AlreadyClosedException("this Analyzer is closed"); + } else { + throw npe; -} + } + } + } + + /** + * Sets the stored value + * + * @param storedValue Value to store + */ + protected final void setStoredValue(Object storedValue) { + try { + this.storedValue.set(storedValue); + } catch (NullPointerException npe) { + if (storedValue == null) { + throw new AlreadyClosedException("this Analyzer is closed"); + } else { + throw npe; + } + } + } + + /** + * Closes the ReuseStrategy, freeing any resources + */ + public void close() { + storedValue.close(); + storedValue = null; + } + } + + /** + * Implementation of {@link ReuseStrategy} that reuses the same components for + * every field. + */ + public final static class GlobalReuseStrategy extends ReuseStrategy { + + /** + * {@inheritDoc} + */ + public TokenStreamComponents getReusableComponents(String fieldName) { + return (TokenStreamComponents) getStoredValue(); + } + + /** + * {@inheritDoc} + */ + public void setReusableComponents(String fieldName, TokenStreamComponents components) { + setStoredValue(components); + } + } + + /** + * Implementation of {@link ReuseStrategy} that reuses components per-field by + * maintaining a Map of TokenStreamComponent per field name. + */ + public static class PerFieldReuseStrategy extends ReuseStrategy { + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + public TokenStreamComponents getReusableComponents(String fieldName) { + Map componentsPerField = (Map) getStoredValue(); + return componentsPerField != null ? componentsPerField.get(fieldName) : null; + } + + /** + * {@inheritDoc} + */ + @SuppressWarnings("unchecked") + public void setReusableComponents(String fieldName, TokenStreamComponents components) { + Map componentsPerField = (Map) getStoredValue(); + if (componentsPerField == null) { + componentsPerField = new HashMap(); + setStoredValue(componentsPerField); + } + componentsPerField.put(fieldName, components); + } + } + +} Index: modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestMultiAnalyzerQPHelper.java =================================================================== --- modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestMultiAnalyzerQPHelper.java (revision 1161488) +++ modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestMultiAnalyzerQPHelper.java (revision ) @@ -143,16 +143,12 @@ * Expands "multi" to "multi" and "multi2", both at the same position, and * expands "triplemulti" to "triplemulti", "multi3", and "multi2". */ - private class MultiAnalyzer extends Analyzer { + private class MultiAnalyzer extends ReusableAnalyzerBase { - public MultiAnalyzer() { - } - @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); - result = new TestFilter(result); - return result; + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); + return new TokenStreamComponents(result, new TestFilter(result)); } } @@ -214,16 +210,12 @@ * Analyzes "the quick brown" as: quick(incr=2) brown(incr=1). Does not work * correctly for input other than "the quick brown ...". */ - private class PosIncrementAnalyzer extends Analyzer { + private class PosIncrementAnalyzer extends ReusableAnalyzerBase { - public PosIncrementAnalyzer() { - } - @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); - result = new TestPosIncrementFilter(result); - return result; + public TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); + return new TokenStreamComponents(result, new TestPosIncrementFilter(result)); } }