Index: modules/facet/src/test/org/apache/lucene/facet/search/CategoryListIteratorTest.java
===================================================================
--- modules/facet/src/test/org/apache/lucene/facet/search/CategoryListIteratorTest.java (revision 1167468)
+++ modules/facet/src/test/org/apache/lucene/facet/search/CategoryListIteratorTest.java (working copy)
@@ -5,10 +5,7 @@
import java.util.HashSet;
import java.util.Set;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.document.Document;
@@ -140,10 +137,10 @@
DataTokenStream dts2 = new DataTokenStream("2",new SortingIntEncoder(
new UniqueValuesIntEncoder(new DGapIntEncoder(new VInt8IntEncoder()))));
// this test requires that no payloads ever be randomly present!
- final Analyzer noPayloadsAnalyzer = new Analyzer() {
+ final Analyzer noPayloadsAnalyzer = new ReusableAnalyzerBase() {
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new MockTokenizer(reader, MockTokenizer.KEYWORD, false);
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.KEYWORD, false));
}
};
// NOTE: test is wired to LogMP... because test relies on certain docids having payloads
Index: modules/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiAnalyzer.java
===================================================================
--- modules/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiAnalyzer.java (revision 1167468)
+++ modules/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiAnalyzer.java (working copy)
@@ -122,16 +122,12 @@
* Expands "multi" to "multi" and "multi2", both at the same position,
* and expands "triplemulti" to "triplemulti", "multi3", and "multi2".
*/
- private class MultiAnalyzer extends Analyzer {
+ private class MultiAnalyzer extends ReusableAnalyzerBase {
- public MultiAnalyzer() {
- }
-
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- TokenStream result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
- result = new TestFilter(result);
- return result;
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
+ return new TokenStreamComponents(result, new TestFilter(result));
}
}
@@ -196,16 +192,12 @@
* Analyzes "the quick brown" as: quick(incr=2) brown(incr=1).
* Does not work correctly for input other than "the quick brown ...".
*/
- private class PosIncrementAnalyzer extends Analyzer {
+ private class PosIncrementAnalyzer extends ReusableAnalyzerBase {
- public PosIncrementAnalyzer() {
- }
-
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- TokenStream result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
- result = new TestPosIncrementFilter(result);
- return result;
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
+ return new TokenStreamComponents(result, new TestPosIncrementFilter(result));
}
}
Index: modules/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiFieldQueryParser.java
===================================================================
--- modules/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiFieldQueryParser.java (revision 1167468)
+++ modules/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiFieldQueryParser.java (working copy)
@@ -22,9 +22,7 @@
import java.util.HashMap;
import java.util.Map;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.*;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
@@ -302,22 +300,23 @@
/**
* Return empty tokens for field "f1".
*/
- private static class AnalyzerReturningNull extends Analyzer {
+ private static class AnalyzerReturningNull extends ReusableAnalyzerBase {
MockAnalyzer stdAnalyzer = new MockAnalyzer(random);
public AnalyzerReturningNull() {
+ super(new PerFieldReuseStrategy());
}
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
if ("f1".equals(fieldName)) {
- return new EmptyTokenStream();
+ return new TokenStreamComponents(new EmptyTokenStream());
} else {
- return stdAnalyzer.tokenStream(fieldName, reader);
+ return stdAnalyzer.createComponents(fieldName, reader);
}
}
- private static class EmptyTokenStream extends TokenStream {
+ private static class EmptyTokenStream extends Tokenizer {
@Override
public boolean incrementToken() throws IOException {
return false;
Index: modules/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java
===================================================================
--- modules/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java (revision 1167468)
+++ modules/queryparser/src/test/org/apache/lucene/queryparser/classic/TestMultiPhraseQueryParsing.java (working copy)
@@ -18,6 +18,7 @@
*/
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -41,7 +42,7 @@
}
}
- private static class CannedAnalyzer extends Analyzer {
+ private static class CannedAnalyzer extends ReusableAnalyzerBase {
private final TokenAndPos[] tokens;
public CannedAnalyzer(TokenAndPos[] tokens) {
@@ -49,8 +50,8 @@
}
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new CannedTokenizer(tokens);
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ return new TokenStreamComponents(new CannedTokenizer(tokens));
}
}
Index: modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestMultiFieldQPHelper.java
===================================================================
--- modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestMultiFieldQPHelper.java (revision 1167468)
+++ modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestMultiFieldQPHelper.java (working copy)
@@ -21,9 +21,7 @@
import java.util.HashMap;
import java.util.Map;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.*;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
@@ -340,22 +338,23 @@
/**
* Return empty tokens for field "f1".
*/
- private static final class AnalyzerReturningNull extends Analyzer {
+ private static final class AnalyzerReturningNull extends ReusableAnalyzerBase {
MockAnalyzer stdAnalyzer = new MockAnalyzer(random);
public AnalyzerReturningNull() {
+ super(new PerFieldReuseStrategy());
}
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
if ("f1".equals(fieldName)) {
- return new EmptyTokenStream();
+ return new TokenStreamComponents(new EmptyTokenStream());
} else {
- return stdAnalyzer.tokenStream(fieldName, reader);
+ return stdAnalyzer.createComponents(fieldName, reader);
}
}
- private static class EmptyTokenStream extends TokenStream {
+ private static class EmptyTokenStream extends Tokenizer {
@Override
public boolean incrementToken() {
return false;
Index: modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestQPHelper.java
===================================================================
--- modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestQPHelper.java (revision 1167468)
+++ modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestQPHelper.java (working copy)
@@ -128,12 +128,13 @@
}
}
- public static final class QPTestAnalyzer extends Analyzer {
+ public static final class QPTestAnalyzer extends ReusableAnalyzerBase {
/** Filters MockTokenizer with StopFilter. */
@Override
- public final TokenStream tokenStream(String fieldName, Reader reader) {
- return new QPTestFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true));
+ public final TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
+ return new TokenStreamComponents(tokenizer, new QPTestFilter(tokenizer));
}
}
@@ -344,10 +345,10 @@
}
}
- private class SimpleCJKAnalyzer extends Analyzer {
+ private class SimpleCJKAnalyzer extends ReusableAnalyzerBase {
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new SimpleCJKTokenizer(reader);
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ return new TokenStreamComponents(new SimpleCJKTokenizer(reader));
}
}
@@ -1241,10 +1242,10 @@
}
}
- private class CannedAnalyzer extends Analyzer {
+ private class CannedAnalyzer extends ReusableAnalyzerBase {
@Override
- public TokenStream tokenStream(String ignored, Reader alsoIgnored) {
- return new CannedTokenStream();
+ public TokenStreamComponents createComponents(String ignored, Reader alsoIgnored) {
+ return new TokenStreamComponents(new CannedTokenStream());
}
}
Index: modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestMultiAnalyzerQPHelper.java
===================================================================
--- modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestMultiAnalyzerQPHelper.java (revision 1167468)
+++ modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestMultiAnalyzerQPHelper.java (working copy)
@@ -143,16 +143,12 @@
* Expands "multi" to "multi" and "multi2", both at the same position, and
* expands "triplemulti" to "triplemulti", "multi3", and "multi2".
*/
- private class MultiAnalyzer extends Analyzer {
+ private class MultiAnalyzer extends ReusableAnalyzerBase {
- public MultiAnalyzer() {
- }
-
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- TokenStream result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
- result = new TestFilter(result);
- return result;
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
+ return new TokenStreamComponents(result, new TestFilter(result));
}
}
@@ -214,16 +210,12 @@
* Analyzes "the quick brown" as: quick(incr=2) brown(incr=1). Does not work
* correctly for input other than "the quick brown ...".
*/
- private class PosIncrementAnalyzer extends Analyzer {
+ private class PosIncrementAnalyzer extends ReusableAnalyzerBase {
- public PosIncrementAnalyzer() {
- }
-
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- TokenStream result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
- result = new TestPosIncrementFilter(result);
- return result;
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
+ return new TokenStreamComponents(result, new TestPosIncrementFilter(result));
}
}
Index: modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/precedence/TestPrecedenceQueryParser.java
===================================================================
--- modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/precedence/TestPrecedenceQueryParser.java (revision 1167468)
+++ modules/queryparser/src/test/org/apache/lucene/queryparser/flexible/precedence/TestPrecedenceQueryParser.java (working copy)
@@ -112,12 +112,13 @@
}
}
- public static final class QPTestAnalyzer extends Analyzer {
+ public static final class QPTestAnalyzer extends ReusableAnalyzerBase {
/** Filters MockTokenizer with StopFilter. */
@Override
- public final TokenStream tokenStream(String fieldName, Reader reader) {
- return new QPTestFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true));
+ public final TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
+ return new TokenStreamComponents(tokenizer, new QPTestFilter(tokenizer));
}
}
Index: modules/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java
===================================================================
--- modules/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java (revision 1167468)
+++ modules/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java (working copy)
@@ -20,10 +20,7 @@
import java.io.IOException;
import java.io.Reader;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.util.LuceneTestCase;
@@ -137,14 +134,11 @@
}
}
-final class ASCIIAnalyzer extends org.apache.lucene.analysis.Analyzer {
- public ASCIIAnalyzer() {
- }
+final class ASCIIAnalyzer extends ReusableAnalyzerBase {
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
- result = new TestFoldingFilter(result);
- return result;
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
+ return new TokenStreamComponents(result, new TestFoldingFilter(result));
}
}
Index: modules/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java
===================================================================
--- modules/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java (revision 1167468)
+++ modules/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java (working copy)
@@ -25,13 +25,7 @@
import java.util.GregorianCalendar;
import java.util.Locale;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.analysis.MockTokenFilter;
-import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@@ -104,12 +98,13 @@
}
- public static final class QPTestAnalyzer extends Analyzer {
+ public static final class QPTestAnalyzer extends ReusableAnalyzerBase {
/** Filters MockTokenizer with StopFilter. */
@Override
- public final TokenStream tokenStream(String fieldName, Reader reader) {
- return new QPTestFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true));
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
+ return new TokenStreamComponents(tokenizer, new QPTestFilter(tokenizer));
}
}
@@ -245,10 +240,10 @@
}
}
- private class SimpleCJKAnalyzer extends Analyzer {
+ private class SimpleCJKAnalyzer extends ReusableAnalyzerBase {
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new SimpleCJKTokenizer(reader);
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ return new TokenStreamComponents(new SimpleCJKTokenizer(reader));
}
}
@@ -348,10 +343,10 @@
assertQueryEquals("a OR -b", null, "a -b");
// +,-,! should be directly adjacent to operand (i.e. not separated by whitespace) to be treated as an operator
- Analyzer a = new Analyzer() {
+ Analyzer a = new ReusableAnalyzerBase() {
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
}
};
assertQueryEquals("a - b", a, "a - b");
@@ -1162,18 +1157,19 @@
}
/** whitespace+lowercase analyzer with synonyms */
- private class Analyzer1 extends Analyzer {
+ private class Analyzer1 extends ReusableAnalyzerBase {
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new MockSynonymFilter(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true));
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
+ return new TokenStreamComponents(tokenizer, new MockSynonymFilter(tokenizer));
}
}
/** whitespace+lowercase analyzer without synonyms */
- private class Analyzer2 extends Analyzer {
+ private class Analyzer2 extends ReusableAnalyzerBase {
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true));
}
}
@@ -1235,10 +1231,11 @@
}
}
- private class MockCollationAnalyzer extends Analyzer {
+ private class MockCollationAnalyzer extends ReusableAnalyzerBase {
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new MockCollationFilter(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true));
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
+ return new TokenStreamComponents(tokenizer, new MockCollationFilter(tokenizer));
}
}
Index: modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java
===================================================================
--- modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java (revision 1167468)
+++ modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUFoldingFilter.java (working copy)
@@ -20,20 +20,18 @@
import java.io.IOException;
import java.io.Reader;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
/**
* Tests ICUFoldingFilter
*/
public class TestICUFoldingFilter extends BaseTokenStreamTestCase {
- Analyzer a = new Analyzer() {
+ Analyzer a = new ReusableAnalyzerBase() {
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new ICUFoldingFilter(
- new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader));
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+ return new TokenStreamComponents(tokenizer, new ICUFoldingFilter(tokenizer));
}
};
public void testDefaults() throws IOException {
Index: modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java
===================================================================
--- modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java (revision 1167468)
+++ modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2Filter.java (working copy)
@@ -20,9 +20,7 @@
import java.io.IOException;
import java.io.Reader;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import com.ibm.icu.text.Normalizer2;
@@ -31,11 +29,11 @@
* Tests the ICUNormalizer2Filter
*/
public class TestICUNormalizer2Filter extends BaseTokenStreamTestCase {
- Analyzer a = new Analyzer() {
+ Analyzer a = new ReusableAnalyzerBase() {
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new ICUNormalizer2Filter(
- new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader));
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+ return new TokenStreamComponents(tokenizer, new ICUNormalizer2Filter(tokenizer));
}
};
@@ -61,13 +59,14 @@
}
public void testAlternate() throws IOException {
- Analyzer a = new Analyzer() {
+ Analyzer a = new ReusableAnalyzerBase() {
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new ICUNormalizer2Filter(
- new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader),
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+ return new TokenStreamComponents(tokenizer, new ICUNormalizer2Filter(
+ tokenizer,
/* specify nfc with decompose to get nfd */
- Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.DECOMPOSE));
+ Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.DECOMPOSE)));
}
};
Index: modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyFilter.java
===================================================================
--- modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyFilter.java (revision 1167468)
+++ modules/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyFilter.java (working copy)
@@ -20,9 +20,7 @@
import com.ibm.icu.text.Collator;
-import org.apache.lucene.analysis.CollationTestBase;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.util.BytesRef;
@@ -46,7 +44,7 @@
(collator.getCollationKey(secondRangeEndOriginal).toByteArray()));
- public final class TestAnalyzer extends Analyzer {
+ public final class TestAnalyzer extends ReusableAnalyzerBase {
private Collator _collator;
TestAnalyzer(Collator collator) {
@@ -54,10 +52,9 @@
}
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- TokenStream result = new KeywordTokenizer(reader);
- result = new ICUCollationKeyFilter(result, _collator);
- return result;
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer result = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(result, new ICUCollationKeyFilter(result, _collator));
}
}
Index: modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java
===================================================================
--- modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java (revision 1167468)
+++ modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java (working copy)
@@ -25,6 +25,7 @@
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.en.PorterStemFilter;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.analysis.TokenStream;
@@ -54,7 +55,7 @@
*
* @lucene.experimental
*/
-public final class SmartChineseAnalyzer extends Analyzer {
+public final class SmartChineseAnalyzer extends ReusableAnalyzerBase {
private final Set> stopWords;
@@ -141,9 +142,9 @@
}
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- TokenStream result = new SentenceTokenizer(reader);
- result = new WordTokenFilter(result);
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new SentenceTokenizer(reader);
+ TokenStream result = new WordTokenFilter(tokenizer);
// result = new LowerCaseFilter(result);
// LowerCaseFilter is not needed, as SegTokenFilter lowercases Basic Latin text.
// The porter stemming is too strict, this is not a bug, this is a feature:)
@@ -151,32 +152,6 @@
if (!stopWords.isEmpty()) {
result = new StopFilter(matchVersion, result, stopWords, false);
}
- return result;
+ return new TokenStreamComponents(tokenizer, result);
}
-
- private static final class SavedStreams {
- Tokenizer tokenStream;
- TokenStream filteredTokenStream;
- }
-
- @Override
- public TokenStream reusableTokenStream(String fieldName, Reader reader)
- throws IOException {
- SavedStreams streams = (SavedStreams) getPreviousTokenStream();
- if (streams == null) {
- streams = new SavedStreams();
- setPreviousTokenStream(streams);
- streams.tokenStream = new SentenceTokenizer(reader);
- streams.filteredTokenStream = new WordTokenFilter(streams.tokenStream);
- streams.filteredTokenStream = new PorterStemFilter(streams.filteredTokenStream);
- if (!stopWords.isEmpty()) {
- streams.filteredTokenStream = new StopFilter(matchVersion, streams.filteredTokenStream, stopWords, false);
- }
- } else {
- streams.tokenStream.reset(reader);
- streams.filteredTokenStream.reset(); // reset WordTokenFilter's state
- }
-
- return streams.filteredTokenStream;
- }
}
Index: modules/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java
===================================================================
--- modules/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java (revision 1167468)
+++ modules/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java (working copy)
@@ -19,11 +19,8 @@
import java.io.Reader;
import java.io.StringReader;
import java.util.Arrays;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
+
+import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
@@ -87,11 +84,12 @@
* @return Map
*/
public void testCommonGramsQueryFilter() throws Exception {
- Analyzer a = new Analyzer() {
+ Analyzer a = new ReusableAnalyzerBase() {
@Override
- public TokenStream tokenStream(String field, Reader in) {
- return new CommonGramsQueryFilter(new CommonGramsFilter(TEST_VERSION_CURRENT,
- new MockTokenizer(in, MockTokenizer.WHITESPACE, false), commonWords));
+ public TokenStreamComponents createComponents(String field, Reader in) {
+ Tokenizer tokenizer = new MockTokenizer(in, MockTokenizer.WHITESPACE, false);
+ return new TokenStreamComponents(tokenizer, new CommonGramsQueryFilter(new CommonGramsFilter(TEST_VERSION_CURRENT,
+ tokenizer, commonWords)));
}
};
@@ -156,11 +154,12 @@
}
public void testCommonGramsFilter() throws Exception {
- Analyzer a = new Analyzer() {
+ Analyzer a = new ReusableAnalyzerBase() {
@Override
- public TokenStream tokenStream(String field, Reader in) {
- return new CommonGramsFilter(TEST_VERSION_CURRENT,
- new MockTokenizer(in, MockTokenizer.WHITESPACE, false), commonWords);
+ public TokenStreamComponents createComponents(String field, Reader in) {
+ Tokenizer tokenizer = new MockTokenizer(in, MockTokenizer.WHITESPACE, false);
+ return new TokenStreamComponents(tokenizer, new CommonGramsFilter(TEST_VERSION_CURRENT,
+ tokenizer, commonWords));
}
};
Index: modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
===================================================================
--- modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java (revision 1167468)
+++ modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java (working copy)
@@ -21,10 +21,7 @@
import java.io.Reader;
import java.io.StringReader;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
@@ -120,12 +117,12 @@
String[] y = StandardTokenizer.TOKEN_TYPES;
}
- private static class LowerCaseWhitespaceAnalyzer extends Analyzer {
+ private static class LowerCaseWhitespaceAnalyzer extends ReusableAnalyzerBase {
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new LowerCaseFilter(TEST_VERSION_CURRENT,
- new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader));
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
+ return new TokenStreamComponents(tokenizer, new LowerCaseFilter(TEST_VERSION_CURRENT, tokenizer));
}
}
@@ -237,4 +234,4 @@
data[0]++;
return true;
}
-}
\ No newline at end of file
+}
Index: modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java
===================================================================
--- modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java (revision 1167468)
+++ modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java (working copy)
@@ -17,12 +17,7 @@
package org.apache.lucene.analysis.miscellaneous;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@@ -245,13 +240,13 @@
new int[] { 1, 1, 1 });
/* analyzer that will consume tokens with large position increments */
- Analyzer a2 = new Analyzer() {
+ Analyzer a2 = new ReusableAnalyzerBase() {
@Override
- public TokenStream tokenStream(String field, Reader reader) {
- return new WordDelimiterFilter(
- new LargePosIncTokenFilter(
- new MockTokenizer(reader, MockTokenizer.WHITESPACE, false)),
- flags, protWords);
+ public TokenStreamComponents createComponents(String field, Reader reader) {
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(
+ new LargePosIncTokenFilter(tokenizer),
+ flags, protWords));
}
};
@@ -278,13 +273,14 @@
new int[] { 6, 14, 19 },
new int[] { 1, 11, 1 });
- Analyzer a3 = new Analyzer() {
+ Analyzer a3 = new ReusableAnalyzerBase() {
@Override
- public TokenStream tokenStream(String field, Reader reader) {
+ public TokenStreamComponents createComponents(String field, Reader reader) {
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
StopFilter filter = new StopFilter(TEST_VERSION_CURRENT,
- new MockTokenizer(reader, MockTokenizer.WHITESPACE, false), StandardAnalyzer.STOP_WORDS_SET);
+ tokenizer, StandardAnalyzer.STOP_WORDS_SET);
filter.setEnablePositionIncrements(true);
- return new WordDelimiterFilter(filter, flags, protWords);
+ return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(filter, flags, protWords));
}
};
Index: modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/PatternAnalyzerTest.java
===================================================================
--- modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/PatternAnalyzerTest.java (revision 1167468)
+++ modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/PatternAnalyzerTest.java (working copy)
@@ -18,6 +18,7 @@
*/
import java.io.IOException;
+import java.io.StringReader;
import java.util.Arrays;
import java.util.regex.Pattern;
@@ -128,7 +129,7 @@
assertTokenStreamContents(ts, expected);
// analysis of a String, uses PatternAnalyzer.tokenStream(String, String)
- TokenStream ts2 = analyzer.tokenStream("dummy", document);
+ TokenStream ts2 = analyzer.tokenStream("dummy", new StringReader(document));
assertTokenStreamContents(ts2, expected);
}
}
Index: modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java
===================================================================
--- modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java (revision 1167468)
+++ modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java (working copy)
@@ -144,32 +144,6 @@
assertTokenStreamContents(protectedTokenStream, new String[]{"boring"});
}
- /*
- * analyzer that does not support reuse
- * it is LetterTokenizer on odd invocations, WhitespaceTokenizer on even.
- */
- private class NonreusableAnalyzer extends Analyzer {
- int invocationCount = 0;
- @Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- if (++invocationCount % 2 == 0)
- return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
- else
- return new MockTokenizer(reader, MockTokenizer.SIMPLE, false);
- }
- }
-
- public void testWrappingNonReusableAnalyzer() throws Exception {
- QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new NonreusableAnalyzer());
- a.addStopWords(reader, 10);
-
- TokenStream tokenStream = a.reusableTokenStream("repetitiveField", new StringReader("boring"));
- assertTokenStreamContents(tokenStream, new String[0]);
-
- tokenStream = a.reusableTokenStream("repetitiveField", new StringReader("vaguelyboring"));
- assertTokenStreamContents(tokenStream, new String[0]);
- }
-
public void testTokenStream() throws Exception {
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false));
a.addStopWords(reader, 10);
Index: modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java
===================================================================
--- modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java (revision 1167468)
+++ modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java (working copy)
@@ -148,40 +148,6 @@
new int[] { 6, 9, 9, 12, 12, 18, 18 },
new int[] { 1, 0, 1, 0, 1, 0, 1 });
}
-
- /*
- * analyzer that does not support reuse
- * it is LetterTokenizer on odd invocations, WhitespaceTokenizer on even.
- */
- private class NonreusableAnalyzer extends Analyzer {
- int invocationCount = 0;
- @Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- if (++invocationCount % 2 == 0)
- return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
- else
- return new MockTokenizer(reader, MockTokenizer.SIMPLE, false);
- }
- }
-
- public void testWrappedAnalyzerDoesNotReuse() throws Exception {
- Analyzer a = new ShingleAnalyzerWrapper(new NonreusableAnalyzer());
- assertAnalyzesToReuse(a, "please divide into shingles.",
- new String[] { "please", "please divide", "divide", "divide into", "into", "into shingles", "shingles" },
- new int[] { 0, 0, 7, 7, 14, 14, 19 },
- new int[] { 6, 13, 13, 18, 18, 27, 27 },
- new int[] { 1, 0, 1, 0, 1, 0, 1 });
- assertAnalyzesToReuse(a, "please divide into shingles.",
- new String[] { "please", "please divide", "divide", "divide into", "into", "into shingles.", "shingles." },
- new int[] { 0, 0, 7, 7, 14, 14, 19 },
- new int[] { 6, 13, 13, 18, 18, 28, 28 },
- new int[] { 1, 0, 1, 0, 1, 0, 1 });
- assertAnalyzesToReuse(a, "please divide into shingles.",
- new String[] { "please", "please divide", "divide", "divide into", "into", "into shingles", "shingles" },
- new int[] { 0, 0, 7, 7, 14, 14, 19 },
- new int[] { 6, 13, 13, 18, 18, 27, 27 },
- new int[] { 1, 0, 1, 0, 1, 0, 1 });
- }
public void testNonDefaultMinShingleSize() throws Exception {
ShingleAnalyzerWrapper analyzer
Index: modules/analysis/common/src/test/org/apache/lucene/analysis/cn/TestChineseTokenizer.java
===================================================================
--- modules/analysis/common/src/test/org/apache/lucene/analysis/cn/TestChineseTokenizer.java (revision 1167468)
+++ modules/analysis/common/src/test/org/apache/lucene/analysis/cn/TestChineseTokenizer.java (working copy)
@@ -21,9 +21,7 @@
import java.io.Reader;
import java.io.StringReader;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.util.Version;
@@ -66,10 +64,10 @@
* Analyzer that just uses ChineseTokenizer, not ChineseFilter.
* convenience to show the behavior of the tokenizer
*/
- private class JustChineseTokenizerAnalyzer extends Analyzer {
+ private class JustChineseTokenizerAnalyzer extends ReusableAnalyzerBase {
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new ChineseTokenizer(reader);
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ return new TokenStreamComponents(new ChineseTokenizer(reader));
}
}
@@ -77,10 +75,11 @@
* Analyzer that just uses ChineseFilter, not ChineseTokenizer.
* convenience to show the behavior of the filter.
*/
- private class JustChineseFilterAnalyzer extends Analyzer {
+ private class JustChineseFilterAnalyzer extends ReusableAnalyzerBase {
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new ChineseFilter(new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader));
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader);
+ return new TokenStreamComponents(tokenizer, new ChineseFilter(tokenizer));
}
}
Index: modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyFilter.java
===================================================================
--- modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyFilter.java (revision 1167468)
+++ modules/analysis/common/src/test/org/apache/lucene/collation/TestCollationKeyFilter.java (working copy)
@@ -18,9 +18,7 @@
*/
-import org.apache.lucene.analysis.CollationTestBase;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.util.BytesRef;
@@ -54,7 +52,7 @@
(collator.getCollationKey(secondRangeEndOriginal).toByteArray()));
- public final class TestAnalyzer extends Analyzer {
+ public final class TestAnalyzer extends ReusableAnalyzerBase {
private Collator _collator;
TestAnalyzer(Collator collator) {
@@ -62,10 +60,9 @@
}
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- TokenStream result = new KeywordTokenizer(reader);
- result = new CollationKeyFilter(result, _collator);
- return result;
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer result = new KeywordTokenizer(reader);
+ return new TokenStreamComponents(result, new CollationKeyFilter(result, _collator));
}
}
Index: modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
===================================================================
--- modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java (revision 1167468)
+++ modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java (working copy)
@@ -124,9 +124,9 @@
tok = new StopFilter(matchVersion, tok, stopwords);
return new TokenStreamComponents(src, tok) {
@Override
- protected boolean reset(final Reader reader) throws IOException {
+ protected void reset(final Reader reader) throws IOException {
src.setMaxTokenLength(StandardAnalyzer.this.maxTokenLength);
- return super.reset(reader);
+ super.reset(reader);
}
};
}
Index: modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
===================================================================
--- modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java (revision 1167468)
+++ modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java (working copy)
@@ -123,9 +123,9 @@
tok = new StopFilter(matchVersion, tok, stopwords);
return new TokenStreamComponents(src, tok) {
@Override
- protected boolean reset(final Reader reader) throws IOException {
+ protected void reset(final Reader reader) throws IOException {
src.setMaxTokenLength(ClassicAnalyzer.this.maxTokenLength);
- return super.reset(reader);
+ super.reset(reader);
}
};
}
Index: modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java
===================================================================
--- modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java (revision 1167468)
+++ modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java (working copy)
@@ -27,6 +27,7 @@
import java.util.regex.Pattern;
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.StopAnalyzer;
@@ -66,7 +67,7 @@
* @deprecated (4.0) use the pattern-based analysis in the analysis/pattern package instead.
*/
@Deprecated
-public final class PatternAnalyzer extends Analyzer {
+public final class PatternAnalyzer extends ReusableAnalyzerBase {
/** "\\W+"; Divides text at non-letters (NOT Character.isLetter(c)) */
public static final Pattern NON_WORD_PATTERN = Pattern.compile("\\W+");
@@ -187,25 +188,21 @@
* the string to tokenize
* @return a new token stream
*/
- public TokenStream tokenStream(String fieldName, String text) {
+ public TokenStreamComponents createComponents(String fieldName, String text) {
// Ideally the Analyzer superclass should have a method with the same signature,
// with a default impl that simply delegates to the StringReader flavour.
if (text == null)
throw new IllegalArgumentException("text must not be null");
- TokenStream stream;
if (pattern == NON_WORD_PATTERN) { // fast path
- stream = new FastStringTokenizer(text, true, toLowerCase, stopWords);
+ return new TokenStreamComponents(new FastStringTokenizer(text, true, toLowerCase, stopWords));
+ } else if (pattern == WHITESPACE_PATTERN) { // fast path
+ return new TokenStreamComponents(new FastStringTokenizer(text, false, toLowerCase, stopWords));
}
- else if (pattern == WHITESPACE_PATTERN) { // fast path
- stream = new FastStringTokenizer(text, false, toLowerCase, stopWords);
- }
- else {
- stream = new PatternTokenizer(text, pattern, toLowerCase);
- if (stopWords != null) stream = new StopFilter(matchVersion, stream, stopWords);
- }
-
- return stream;
+
+ Tokenizer tokenizer = new PatternTokenizer(text, pattern, toLowerCase);
+ TokenStream result = (stopWords != null) ? new StopFilter(matchVersion, tokenizer, stopWords) : tokenizer;
+ return new TokenStreamComponents(tokenizer, result);
}
/**
@@ -220,10 +217,10 @@
* @return a new token stream
*/
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
try {
String text = toString(reader);
- return tokenStream(fieldName, text);
+ return createComponents(fieldName, text);
} catch (IOException e) {
throw new RuntimeException(e);
}
Index: lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/TokenSourcesTest.java
===================================================================
--- lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/TokenSourcesTest.java (revision 1167468)
+++ lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/TokenSourcesTest.java (working copy)
@@ -20,9 +20,7 @@
import java.io.IOException;
import java.io.Reader;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@@ -50,15 +48,15 @@
public class TokenSourcesTest extends LuceneTestCase {
private static final String FIELD = "text";
- private static final class OverlapAnalyzer extends Analyzer {
+ private static final class OverlapAnalyzer extends ReusableAnalyzerBase {
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new TokenStreamOverlap();
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ return new TokenStreamComponents(new TokenStreamOverlap());
}
}
- private static final class TokenStreamOverlap extends TokenStream {
+ private static final class TokenStreamOverlap extends Tokenizer {
private Token[] tokens;
private int i = -1;
Index: lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/OffsetLimitTokenFilterTest.java
===================================================================
--- lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/OffsetLimitTokenFilterTest.java (revision 1167468)
+++ lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/OffsetLimitTokenFilterTest.java (working copy)
@@ -20,10 +20,7 @@
import java.io.Reader;
import java.io.StringReader;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.*;
public class OffsetLimitTokenFilterTest extends BaseTokenStreamTestCase {
@@ -52,15 +49,14 @@
assertTokenStreamContents(filter, new String[] {"short", "toolong",
"evenmuchlongertext"});
- // TODO: This is not actually testing reuse! (reusableTokenStream is not implemented)
- checkOneTermReuse(new Analyzer() {
+ checkOneTermReuse(new ReusableAnalyzerBase() {
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
tokenizer.setEnableChecks(false);
- return new OffsetLimitTokenFilter(tokenizer, 10);
+ return new TokenStreamComponents(tokenizer, new OffsetLimitTokenFilter(tokenizer, 10));
}
}, "llenges", "llenges");
}
-}
\ No newline at end of file
+}
Index: lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
===================================================================
--- lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (revision 1167468)
+++ lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (working copy)
@@ -1802,7 +1802,7 @@
// behaviour to synonyms
// ===================================================================
-final class SynonymAnalyzer extends Analyzer {
+final class SynonymAnalyzer extends ReusableAnalyzerBase {
private Map synonyms;
public SynonymAnalyzer(Map synonyms) {
@@ -1816,12 +1816,12 @@
* java.io.Reader)
*/
@Override
- public TokenStream tokenStream(String arg0, Reader arg1) {
+ public TokenStreamComponents createComponents(String arg0, Reader arg1) {
Tokenizer stream = new MockTokenizer(arg1, MockTokenizer.SIMPLE, true);
stream.addAttribute(CharTermAttribute.class);
stream.addAttribute(PositionIncrementAttribute.class);
stream.addAttribute(OffsetAttribute.class);
- return new SynonymTokenizer(stream, synonyms);
+ return new TokenStreamComponents(stream, new SynonymTokenizer(stream, synonyms));
}
}
Index: lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
===================================================================
--- lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java (revision 1167468)
+++ lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java (working copy)
@@ -24,11 +24,7 @@
import java.util.Collection;
import java.util.List;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
@@ -198,10 +194,10 @@
return phraseQuery;
}
- static final class BigramAnalyzer extends Analyzer {
+ static final class BigramAnalyzer extends ReusableAnalyzerBase {
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new BasicNGramTokenizer( reader );
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ return new TokenStreamComponents(new BasicNGramTokenizer(reader));
}
}
Index: lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/IndexTimeSynonymTest.java
===================================================================
--- lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/IndexTimeSynonymTest.java (revision 1167468)
+++ lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/IndexTimeSynonymTest.java (working copy)
@@ -22,9 +22,7 @@
import java.util.HashSet;
import java.util.Set;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BooleanClause.Occur;
@@ -292,15 +290,15 @@
return token;
}
- public static final class TokenArrayAnalyzer extends Analyzer {
- Token[] tokens;
- public TokenArrayAnalyzer( Token... tokens ){
+ public static final class TokenArrayAnalyzer extends ReusableAnalyzerBase {
+ final Token[] tokens;
+ public TokenArrayAnalyzer(Token... tokens) {
this.tokens = tokens;
}
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- TokenStream ts = new TokenStream(Token.TOKEN_ATTRIBUTE_FACTORY) {
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer ts = new Tokenizer(Token.TOKEN_ATTRIBUTE_FACTORY) {
final AttributeImpl reusableToken = (AttributeImpl) addAttribute(CharTermAttribute.class);
int p = 0;
@@ -318,7 +316,7 @@
this.p = 0;
}
};
- return ts;
+ return new TokenStreamComponents(ts);
}
}
}
Index: lucene/src/test/org/apache/lucene/search/TestPhraseQuery.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/TestPhraseQuery.java (revision 1167468)
+++ lucene/src/test/org/apache/lucene/search/TestPhraseQuery.java (working copy)
@@ -54,10 +54,10 @@
@BeforeClass
public static void beforeClass() throws Exception {
directory = newDirectory();
- Analyzer analyzer = new Analyzer() {
+ Analyzer analyzer = new ReusableAnalyzerBase() {
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
}
@Override
Index: lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java (revision 1167468)
+++ lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java (working copy)
@@ -56,10 +56,10 @@
final static boolean VERBOSE = false;
public void testSetPosition() throws Exception {
- Analyzer analyzer = new Analyzer() {
+ Analyzer analyzer = new ReusableAnalyzerBase() {
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new TokenStream() {
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ return new TokenStreamComponents(new Tokenizer() {
private final String[] TOKENS = {"1", "2", "3", "4", "5"};
private final int[] INCREMENTS = {0, 2, 1, 0, 1};
private int i = 0;
@@ -85,7 +85,7 @@
super.reset();
this.i = 0;
}
- };
+ });
}
};
Directory store = newDirectory();
Index: lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java (revision 1167468)
+++ lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java (working copy)
@@ -190,7 +190,7 @@
assertFalse("queries with different inclusive are not equal", query.equals(other));
}
- private static class SingleCharAnalyzer extends Analyzer {
+ private static class SingleCharAnalyzer extends ReusableAnalyzerBase {
private static class SingleCharTokenizer extends Tokenizer {
char[] buffer = new char[1];
@@ -225,20 +225,9 @@
}
@Override
- public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
- Tokenizer tokenizer = (Tokenizer) getPreviousTokenStream();
- if (tokenizer == null) {
- tokenizer = new SingleCharTokenizer(reader);
- setPreviousTokenStream(tokenizer);
- } else
- tokenizer.reset(reader);
- return tokenizer;
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ return new TokenStreamComponents(new SingleCharTokenizer(reader));
}
-
- @Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new SingleCharTokenizer(reader);
- }
}
private void initializeIndex(String[] values) throws IOException {
Index: lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java (revision 1167468)
+++ lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java (working copy)
@@ -55,12 +55,11 @@
private static byte[] payload2 = new byte[]{2};
private static byte[] payload4 = new byte[]{4};
- private static class PayloadAnalyzer extends Analyzer {
+ private static class PayloadAnalyzer extends ReusableAnalyzerBase {
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
- result = new PayloadFilter(result, fieldName);
- return result;
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
+ return new TokenStreamComponents(result, new PayloadFilter(result, fieldName));
}
}
Index: lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java (revision 1167468)
+++ lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java (working copy)
@@ -64,14 +64,16 @@
private static final byte[] payloadMultiField2 = new byte[]{4};
protected static Directory directory;
- private static class PayloadAnalyzer extends Analyzer {
+ private static class PayloadAnalyzer extends ReusableAnalyzerBase {
+ private PayloadAnalyzer() {
+ super(new PerFieldReuseStrategy());
+ }
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
- result = new PayloadFilter(result, fieldName);
- return result;
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
+ return new TokenStreamComponents(result, new PayloadFilter(result, fieldName));
}
}
Index: lucene/src/test/org/apache/lucene/search/payloads/PayloadHelper.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/payloads/PayloadHelper.java (revision 1167468)
+++ lucene/src/test/org/apache/lucene/search/payloads/PayloadHelper.java (working copy)
@@ -55,14 +55,16 @@
public IndexReader reader;
- public final class PayloadAnalyzer extends Analyzer {
+ public final class PayloadAnalyzer extends ReusableAnalyzerBase {
+ public PayloadAnalyzer() {
+ super(new PerFieldReuseStrategy());
+ }
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
- result = new PayloadFilter(result, fieldName);
- return result;
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
+ return new TokenStreamComponents(result, new PayloadFilter(result, fieldName));
}
}
Index: lucene/src/test/org/apache/lucene/search/spans/TestBasics.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/spans/TestBasics.java (revision 1167468)
+++ lucene/src/test/org/apache/lucene/search/spans/TestBasics.java (working copy)
@@ -25,6 +25,7 @@
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
@@ -70,14 +71,12 @@
private static Directory directory;
static final class SimplePayloadFilter extends TokenFilter {
- String fieldName;
int pos;
final PayloadAttribute payloadAttr;
final CharTermAttribute termAttr;
- public SimplePayloadFilter(TokenStream input, String fieldName) {
+ public SimplePayloadFilter(TokenStream input) {
super(input);
- this.fieldName = fieldName;
pos = 0;
payloadAttr = input.addAttribute(PayloadAttribute.class);
termAttr = input.addAttribute(CharTermAttribute.class);
@@ -105,7 +104,7 @@
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
- return new SimplePayloadFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true), fieldName);
+ return new SimplePayloadFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true));
}
};
Index: lucene/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java (revision 1167468)
+++ lucene/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java (working copy)
@@ -23,10 +23,7 @@
import java.util.HashSet;
import java.util.Set;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -479,18 +476,16 @@
assertEquals(numSpans, cnt);
}
- final class PayloadAnalyzer extends Analyzer {
+ final class PayloadAnalyzer extends ReusableAnalyzerBase {
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
- result = new PayloadFilter(result, fieldName);
- return result;
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
+ return new TokenStreamComponents(result, new PayloadFilter(result));
}
}
final class PayloadFilter extends TokenFilter {
- String fieldName;
Set entities = new HashSet();
Set nopayload = new HashSet();
int pos;
@@ -498,9 +493,8 @@
CharTermAttribute termAtt;
PositionIncrementAttribute posIncrAtt;
- public PayloadFilter(TokenStream input, String fieldName) {
+ public PayloadFilter(TokenStream input) {
super(input);
- this.fieldName = fieldName;
pos = 0;
entities.add("xx");
entities.add("one");
@@ -536,13 +530,12 @@
}
}
- public final class TestPayloadAnalyzer extends Analyzer {
+ public final class TestPayloadAnalyzer extends ReusableAnalyzerBase {
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
- result = new PayloadFilter(result, fieldName);
- return result;
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
+ return new TokenStreamComponents(result, new PayloadFilter(result));
}
}
}
Index: lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java
===================================================================
--- lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java (revision 1167468)
+++ lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java (working copy)
@@ -17,6 +17,7 @@
* limitations under the License.
*/
+import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
@@ -342,7 +343,7 @@
}
}
- private static class CannedAnalyzer extends Analyzer {
+ private static class CannedAnalyzer extends ReusableAnalyzerBase {
private final TokenAndPos[] tokens;
public CannedAnalyzer(TokenAndPos[] tokens) {
@@ -350,8 +351,8 @@
}
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new CannedTokenizer(tokens);
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ return new TokenStreamComponents(new CannedTokenizer(tokens));
}
}
Index: lucene/src/test/org/apache/lucene/index/TestIndexWriterCommit.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestIndexWriterCommit.java (revision 1167468)
+++ lucene/src/test/org/apache/lucene/index/TestIndexWriterCommit.java (working copy)
@@ -23,11 +23,7 @@
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.analysis.MockFixedLengthPayloadFilter;
-import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.*;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
@@ -179,21 +175,20 @@
Analyzer analyzer;
if (random.nextBoolean()) {
// no payloads
- analyzer = new Analyzer() {
+ analyzer = new ReusableAnalyzerBase() {
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true));
}
};
} else {
// fixed length payloads
final int length = random.nextInt(200);
- analyzer = new Analyzer() {
+ analyzer = new ReusableAnalyzerBase() {
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new MockFixedLengthPayloadFilter(random,
- new MockTokenizer(reader, MockTokenizer.WHITESPACE, true),
- length);
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
+ return new TokenStreamComponents(tokenizer, new MockFixedLengthPayloadFilter(random, tokenizer, length));
}
};
}
Index: lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java (revision 1167468)
+++ lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java (working copy)
@@ -27,11 +27,7 @@
import java.util.List;
import java.util.Random;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.*;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
@@ -390,12 +386,12 @@
doc.add(newField("field", "a field", TextField.TYPE_STORED));
w.addDocument(doc);
- Analyzer analyzer = new Analyzer() {
+ Analyzer analyzer = new ReusableAnalyzerBase(new ReusableAnalyzerBase.PerFieldReuseStrategy()) {
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
- return new CrashingFilter(fieldName, tokenizer);
+ return new TokenStreamComponents(tokenizer, new CrashingFilter(fieldName, tokenizer));
}
};
@@ -458,13 +454,13 @@
// LUCENE-1072
public void testExceptionFromTokenStream() throws IOException {
Directory dir = newDirectory();
- IndexWriterConfig conf = newIndexWriterConfig( TEST_VERSION_CURRENT, new Analyzer() {
+ IndexWriterConfig conf = newIndexWriterConfig( TEST_VERSION_CURRENT, new ReusableAnalyzerBase() {
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
- return new TokenFilter(tokenizer) {
+ return new TokenStreamComponents(tokenizer, new TokenFilter(tokenizer) {
private int count = 0;
@Override
@@ -480,7 +476,7 @@
super.reset();
this.count = 0;
}
- };
+ });
}
});
@@ -595,12 +591,12 @@
}
public void testDocumentsWriterExceptions() throws IOException {
- Analyzer analyzer = new Analyzer() {
+ Analyzer analyzer = new ReusableAnalyzerBase(new ReusableAnalyzerBase.PerFieldReuseStrategy()) {
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
- return new CrashingFilter(fieldName, tokenizer);
+ return new TokenStreamComponents(tokenizer, new CrashingFilter(fieldName, tokenizer));
}
};
@@ -691,12 +687,12 @@
}
public void testDocumentsWriterExceptionThreads() throws Exception {
- Analyzer analyzer = new Analyzer() {
+ Analyzer analyzer = new ReusableAnalyzerBase(new ReusableAnalyzerBase.PerFieldReuseStrategy()) {
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
- return new CrashingFilter(fieldName, tokenizer);
+ return new TokenStreamComponents(tokenizer, new CrashingFilter(fieldName, tokenizer));
}
};
Index: lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java (revision 1167468)
+++ lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java (working copy)
@@ -20,11 +20,7 @@
import java.io.IOException;
import java.io.Reader;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@@ -107,10 +103,10 @@
}
public void testPositionIncrementGap() throws IOException {
- Analyzer analyzer = new Analyzer() {
+ Analyzer analyzer = new ReusableAnalyzerBase() {
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
}
@Override
@@ -142,10 +138,11 @@
}
public void testTokenReuse() throws IOException {
- Analyzer analyzer = new Analyzer() {
+ Analyzer analyzer = new ReusableAnalyzerBase() {
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new TokenFilter(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false)) {
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+ return new TokenStreamComponents(tokenizer, new TokenFilter(tokenizer) {
boolean first = true;
AttributeSource.State state;
@@ -187,7 +184,7 @@
final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class);
final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
- };
+ });
}
};
Index: lucene/src/test/org/apache/lucene/index/TestPayloads.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestPayloads.java (revision 1167468)
+++ lucene/src/test/org/apache/lucene/index/TestPayloads.java (working copy)
@@ -25,11 +25,7 @@
import java.util.List;
import java.util.Map;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
@@ -105,12 +101,12 @@
// so this field is used to check if the DocumentWriter correctly enables the payloads bit
// even if only some term positions have payloads
d.add(newField("f2", "This field has payloads in all docs", TextField.TYPE_UNSTORED));
- d.add(newField("f2", "This field has payloads in all docs", TextField.TYPE_UNSTORED));
+ d.add(newField("f2", "This field has payloads in all docs NO PAYLOAD", TextField.TYPE_UNSTORED));
// this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads
// enabled in only some documents
d.add(newField("f3", "This field has payloads in some docs", TextField.TYPE_UNSTORED));
// only add payload data for field f2
- analyzer.setPayloadData("f2", 1, "somedata".getBytes(), 0, 1);
+ analyzer.setPayloadData("f2", "somedata".getBytes(), 0, 1);
writer.addDocument(d);
// flush
writer.close();
Index: lucene/src/test/org/apache/lucene/index/TestSameTokenSamePosition.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestSameTokenSamePosition.java (revision 1167468)
+++ lucene/src/test/org/apache/lucene/index/TestSameTokenSamePosition.java (working copy)
@@ -20,8 +20,7 @@
import java.io.IOException;
import java.io.Reader;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
@@ -64,10 +63,10 @@
}
}
-final class BugReproAnalyzer extends Analyzer{
+final class BugReproAnalyzer extends ReusableAnalyzerBase {
@Override
- public TokenStream tokenStream(String arg0, Reader arg1) {
- return new BugReproAnalyzerTokenizer();
+ public TokenStreamComponents createComponents(String arg0, Reader arg1) {
+ return new TokenStreamComponents(new BugReproAnalyzerTokenizer());
}
}
Index: lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java (revision 1167468)
+++ lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java (working copy)
@@ -20,10 +20,7 @@
import java.io.IOException;
import java.io.Reader;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.*;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.codecs.CodecProvider;
@@ -71,10 +68,10 @@
private void createIndex(int numHits) throws IOException {
int numDocs = 500;
- final Analyzer analyzer = new Analyzer() {
+ final Analyzer analyzer = new ReusableAnalyzerBase() {
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true));
}
};
Directory directory = new SeekCountingDirectory(new RAMDirectory());
Index: lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java (revision 1167468)
+++ lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java (working copy)
@@ -137,7 +137,7 @@
super.tearDown();
}
- private class MyTokenStream extends TokenStream {
+ private class MyTokenStream extends Tokenizer {
private int tokenUpto;
private final CharTermAttribute termAtt;
@@ -175,10 +175,10 @@
}
}
- private class MyAnalyzer extends Analyzer {
+ private class MyAnalyzer extends ReusableAnalyzerBase {
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new MyTokenStream();
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ return new TokenStreamComponents(new MyTokenStream());
}
}
Index: lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (revision 1167468)
+++ lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (working copy)
@@ -31,11 +31,7 @@
import java.util.Map;
import java.util.Random;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.BinaryField;
@@ -1710,10 +1706,10 @@
dir.close();
}
- static final class StringSplitAnalyzer extends Analyzer {
+ static final class StringSplitAnalyzer extends ReusableAnalyzerBase {
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new StringSplitTokenizer(reader);
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ return new TokenStreamComponents(new StringSplitTokenizer(reader));
}
}
Index: lucene/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java (revision 1167468)
+++ lucene/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java (working copy)
@@ -21,10 +21,7 @@
import java.io.Reader;
import java.util.concurrent.atomic.AtomicInteger;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.TextField;
@@ -114,11 +111,12 @@
assertEquals("Wrong payload for the target " + target + ": " + b.bytes[b.offset], (byte) target, b.bytes[b.offset]);
}
- private static class PayloadAnalyzer extends Analyzer {
+ private static class PayloadAnalyzer extends ReusableAnalyzerBase {
private final AtomicInteger payloadCount = new AtomicInteger(-1);
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new PayloadFilter(payloadCount, new MockTokenizer(reader, MockTokenizer.WHITESPACE, true));
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
+ return new TokenStreamComponents(tokenizer, new PayloadFilter(payloadCount, tokenizer));
}
}
Index: lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java (revision 1167468)
+++ lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java (working copy)
@@ -26,10 +26,7 @@
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicBoolean;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.*;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
@@ -902,10 +899,10 @@
final Random r = random;
Directory dir = newDirectory();
// note this test explicitly disables payloads
- final Analyzer analyzer = new Analyzer() {
+ final Analyzer analyzer = new ReusableAnalyzerBase() {
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ return new TokenStreamComponents(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true));
}
};
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer).setRAMBufferSizeMB(1.0).setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH));
Index: lucene/src/test/org/apache/lucene/index/TestTermdocPerf.java
===================================================================
--- lucene/src/test/org/apache/lucene/index/TestTermdocPerf.java (revision 1167468)
+++ lucene/src/test/org/apache/lucene/index/TestTermdocPerf.java (working copy)
@@ -22,7 +22,7 @@
import java.util.Random;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
@@ -77,10 +77,10 @@
void addDocs(final Random random, Directory dir, final int ndocs, String field, final String val, final int maxTF, final float percentDocs) throws IOException {
final RepeatingTokenStream ts = new RepeatingTokenStream(val, random, percentDocs, maxTF);
- Analyzer analyzer = new Analyzer() {
+ Analyzer analyzer = new ReusableAnalyzerBase() {
@Override
- public TokenStream tokenStream(String fieldName, Reader reader) {
- return ts;
+ public TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ return new TokenStreamComponents(ts);
}
};
Index: lucene/src/test/org/apache/lucene/TestAssertions.java
===================================================================
--- lucene/src/test/org/apache/lucene/TestAssertions.java (revision 1167468)
+++ lucene/src/test/org/apache/lucene/TestAssertions.java (working copy)
@@ -19,6 +19,7 @@
import java.io.Reader;
+import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
@@ -34,32 +35,36 @@
}
}
- static class TestAnalyzer1 extends Analyzer {
+ static class TestAnalyzer1 extends ReusableAnalyzerBase {
+
@Override
- public final TokenStream tokenStream(String s, Reader r) { return null; }
- @Override
- public final TokenStream reusableTokenStream(String s, Reader r) { return null; }
+ protected TokenStreamComponents createComponents(String fieldName, Reader aReader) {
+ return null;
+ }
}
- static final class TestAnalyzer2 extends Analyzer {
+ static final class TestAnalyzer2 extends ReusableAnalyzerBase {
+
@Override
- public TokenStream tokenStream(String s, Reader r) { return null; }
- @Override
- public TokenStream reusableTokenStream(String s, Reader r) { return null; }
+ protected TokenStreamComponents createComponents(String fieldName, Reader aReader) {
+ return null;
+ }
}
- static class TestAnalyzer3 extends Analyzer {
+ static class TestAnalyzer3 extends ReusableAnalyzerBase {
+
@Override
- public TokenStream tokenStream(String s, Reader r) { return null; }
- @Override
- public TokenStream reusableTokenStream(String s, Reader r) { return null; }
+ protected TokenStreamComponents createComponents(String fieldName, Reader aReader) {
+ return null;
+ }
}
- static class TestAnalyzer4 extends Analyzer {
+ static class TestAnalyzer4 extends ReusableAnalyzerBase {
+
@Override
- public final TokenStream tokenStream(String s, Reader r) { return null; }
- @Override
- public TokenStream reusableTokenStream(String s, Reader r) { return null; }
+ protected TokenStreamComponents createComponents(String fieldName, Reader aReader) {
+ return null;
+ }
}
static class TestTokenStream1 extends TokenStream {
Index: lucene/src/java/org/apache/lucene/analysis/ReusableAnalyzerBase.java
===================================================================
--- lucene/src/java/org/apache/lucene/analysis/ReusableAnalyzerBase.java (revision 1167468)
+++ lucene/src/java/org/apache/lucene/analysis/ReusableAnalyzerBase.java (working copy)
@@ -17,8 +17,13 @@
* limitations under the License.
*/
+import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.util.CloseableThreadLocal;
+
import java.io.IOException;
import java.io.Reader;
+import java.util.HashMap;
+import java.util.Map;
/**
* An convenience subclass of Analyzer that makes it easy to implement
@@ -38,6 +43,16 @@
*/
public abstract class ReusableAnalyzerBase extends Analyzer {
+ private final ReuseStrategy reuseStrategy;
+
+ public ReusableAnalyzerBase() {
+ this(new GlobalReuseStrategy());
+ }
+
+ public ReusableAnalyzerBase(ReuseStrategy reuseStrategy) {
+ this.reuseStrategy = reuseStrategy;
+ }
+
/**
* Creates a new {@link TokenStreamComponents} instance for this analyzer.
*
@@ -66,14 +81,15 @@
@Override
public final TokenStream reusableTokenStream(final String fieldName,
final Reader reader) throws IOException {
- TokenStreamComponents streamChain = (TokenStreamComponents)
- getPreviousTokenStream();
+ TokenStreamComponents components = reuseStrategy.getReusableComponents(fieldName);
final Reader r = initReader(reader);
- if (streamChain == null || !streamChain.reset(r)) {
- streamChain = createComponents(fieldName, r);
- setPreviousTokenStream(streamChain);
+ if (components == null) {
+ components = createComponents(fieldName, r);
+ reuseStrategy.setReusableComponents(fieldName, components);
+ } else {
+ components.reset(r);
}
- return streamChain.getTokenStream();
+ return components.getTokenStream();
}
/**
@@ -98,8 +114,17 @@
protected Reader initReader(Reader reader) {
return reader;
}
-
+
/**
+ * {@inheritDoc}
+ */
+ @Override
+ public void close() {
+ super.close();
+ reuseStrategy.close();
+ }
+
+ /**
* This class encapsulates the outer components of a token stream. It provides
* access to the source ({@link Tokenizer}) and the outer end (sink), an
* instance of {@link TokenFilter} which also serves as the
@@ -137,22 +162,16 @@
}
/**
- * Resets the encapsulated components with the given reader. This method by
- * default returns true indicating that the components have
- * been reset successfully. Subclasses of {@link ReusableAnalyzerBase} might use
- * their own {@link TokenStreamComponents} returning false if
- * the components cannot be reset.
+ * Resets the encapsulated components with the given reader. If the components
+ * cannot be reset, an Exception should be thrown.
*
* @param reader
* a reader to reset the source component
- * @return true if the components were reset, otherwise
- * false
* @throws IOException
* if the component's reset method throws an {@link IOException}
*/
- protected boolean reset(final Reader reader) throws IOException {
+ protected void reset(final Reader reader) throws IOException {
source.reset(reader);
- return true;
}
/**
@@ -166,4 +185,124 @@
}
+ /**
+ * Strategy defining how TokenStreamComponents are reused per call to
+ * {@link ReusableAnalyzerBase#tokenStream(String, java.io.Reader)}.
+ */
+ public static abstract class ReuseStrategy {
+
+ private CloseableThreadLocal