Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java (revision 1500768) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java (working copy) @@ -18,7 +18,6 @@ */ import java.io.IOException; -import java.io.StringReader; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -112,7 +111,7 @@ * separates by {@link SynonymMap#WORD_SEPARATOR}. * reuse and its chars must not be null. */ public static CharsRef analyze(Analyzer analyzer, String text, CharsRef reuse) throws IOException { - TokenStream ts = analyzer.tokenStream("", new StringReader(text)); + TokenStream ts = analyzer.tokenStream("", text); CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class); ts.reset(); Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java (revision 1500768) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java (working copy) @@ -24,7 +24,6 @@ import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.util.Version; -import java.io.StringReader; import java.io.IOException; import java.util.Iterator; import java.util.Set; @@ -47,8 +46,7 @@ public void testDefaults() throws IOException { assertTrue(stop != null); - StringReader reader = new StringReader("This is a test of the english stop analyzer"); - TokenStream stream = stop.tokenStream("test", reader); + TokenStream stream = stop.tokenStream("test", "This is a test of the english stop analyzer"); assertTrue(stream != null); CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); stream.reset(); @@ -61,8 +59,7 @@ public void testStopList() throws IOException { CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("good", "test", "analyzer"), false); StopAnalyzer newStop = new StopAnalyzer(Version.LUCENE_40, stopWordsSet); - StringReader reader = new StringReader("This is a good test of the english stop analyzer"); - TokenStream stream = newStop.tokenStream("test", reader); + TokenStream stream = newStop.tokenStream("test", "This is a good test of the english stop analyzer"); assertNotNull(stream); CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); @@ -76,9 +73,9 @@ public void testStopListPositions() throws IOException { CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("good", "test", "analyzer"), false); StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet); - StringReader reader = new StringReader("This is a good test of the english stop analyzer with positions"); - int expectedIncr[] = { 1, 1, 1, 3, 1, 1, 1, 2, 1}; - TokenStream stream = newStop.tokenStream("test", reader); + String s = "This is a good test of the english stop analyzer with positions"; + int expectedIncr[] = { 1, 1, 1, 3, 1, 1, 1, 2, 1}; + TokenStream stream = newStop.tokenStream("test", s); assertNotNull(stream); int i = 0; CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountAnalyzer.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountAnalyzer.java (revision 1500768) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountAnalyzer.java (working copy) @@ -18,13 +18,10 @@ */ import java.io.IOException; -import java.io.StringReader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.analysis.core.WhitespaceAnalyzer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.DirectoryReader; @@ -47,14 +44,14 @@ Analyzer a = new LimitTokenCountAnalyzer(mock, 2, consumeAll); // dont use assertAnalyzesTo here, as the end offset is not the end of the string (unless consumeAll is true, in which case its correct)! - assertTokenStreamContents(a.tokenStream("dummy", new StringReader("1 2 3 4 5")), new String[] { "1", "2" }, new int[] { 0, 3 }, new int[] { 1, 4 }, consumeAll ? 16 : null); - assertTokenStreamContents(a.tokenStream("dummy", new StringReader("1 2 3 4 5")), new String[] { "1", "2" }, new int[] { 0, 2 }, new int[] { 1, 3 }, consumeAll ? 9 : null); + assertTokenStreamContents(a.tokenStream("dummy", "1 2 3 4 5"), new String[] { "1", "2" }, new int[] { 0, 3 }, new int[] { 1, 4 }, consumeAll ? 16 : null); + assertTokenStreamContents(a.tokenStream("dummy", "1 2 3 4 5"), new String[] { "1", "2" }, new int[] { 0, 2 }, new int[] { 1, 3 }, consumeAll ? 9 : null); // less than the limit, ensure we behave correctly - assertTokenStreamContents(a.tokenStream("dummy", new StringReader("1 ")), new String[] { "1" }, new int[] { 0 }, new int[] { 1 }, consumeAll ? 3 : null); + assertTokenStreamContents(a.tokenStream("dummy", "1 "), new String[] { "1" }, new int[] { 0 }, new int[] { 1 }, consumeAll ? 3 : null); // equal to limit - assertTokenStreamContents(a.tokenStream("dummy", new StringReader("1 2 ")), new String[] { "1", "2" }, new int[] { 0, 3 }, new int[] { 1, 4 }, consumeAll ? 6 : null); + assertTokenStreamContents(a.tokenStream("dummy", "1 2 "), new String[] { "1", "2" }, new int[] { 0, 3 }, new int[] { 1, 4 }, consumeAll ? 6 : null); } } Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilter.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilter.java (revision 1500768) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenPositionFilter.java (working copy) @@ -43,17 +43,17 @@ }; // dont use assertAnalyzesTo here, as the end offset is not the end of the string (unless consumeAll is true, in which case its correct)! - assertTokenStreamContents(a.tokenStream("dummy", new StringReader("1 2 3 4 5")), + assertTokenStreamContents(a.tokenStream("dummy", "1 2 3 4 5"), new String[] { "1", "2" }, new int[] { 0, 3 }, new int[] { 1, 4 }, consumeAll ? 16 : null); assertTokenStreamContents(a.tokenStream("dummy", new StringReader("1 2 3 4 5")), new String[] { "1", "2" }, new int[] { 0, 2 }, new int[] { 1, 3 }, consumeAll ? 9 : null); // less than the limit, ensure we behave correctly - assertTokenStreamContents(a.tokenStream("dummy", new StringReader("1 ")), + assertTokenStreamContents(a.tokenStream("dummy", "1 "), new String[] { "1" }, new int[] { 0 }, new int[] { 1 }, consumeAll ? 3 : null); // equal to limit - assertTokenStreamContents(a.tokenStream("dummy", new StringReader("1 2 ")), + assertTokenStreamContents(a.tokenStream("dummy", "1 2 "), new String[] { "1", "2" }, new int[] { 0, 3 }, new int[] { 1, 4 }, consumeAll ? 6 : null); } } Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalyzerWrapper.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalyzerWrapper.java (revision 1500768) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalyzerWrapper.java (working copy) @@ -1,7 +1,6 @@ package org.apache.lucene.analysis.miscellaneous; import java.io.Reader; -import java.io.StringReader; import java.util.Collections; import java.util.HashMap; import java.util.Map; @@ -38,8 +37,7 @@ PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), analyzerPerField); - TokenStream tokenStream = analyzer.tokenStream("field", - new StringReader(text)); + TokenStream tokenStream = analyzer.tokenStream("field", text); CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class); tokenStream.reset(); @@ -48,8 +46,7 @@ "Qwerty", termAtt.toString()); - tokenStream = analyzer.tokenStream("special", - new StringReader(text)); + tokenStream = analyzer.tokenStream("special", text); termAtt = tokenStream.getAttribute(CharTermAttribute.class); tokenStream.reset(); Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java (revision 1500768) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java (working copy) @@ -26,7 +26,6 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.RAMDirectory; -import java.io.StringReader; import java.util.Arrays; import java.util.Collections; @@ -66,44 +65,44 @@ public void testNoStopwords() throws Exception { // Note: an empty list of fields passed in protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Collections.emptyList(), 1); - TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("variedField", new StringReader("quick")); + TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("variedField", "quick"); assertTokenStreamContents(protectedTokenStream, new String[]{"quick"}); - protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", new StringReader("boring")); + protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "boring"); assertTokenStreamContents(protectedTokenStream, new String[]{"boring"}); } public void testDefaultStopwordsAllFields() throws Exception { protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader); - TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", new StringReader("boring")); + TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "boring"); assertTokenStreamContents(protectedTokenStream, new String[0]); // Default stop word filtering will remove boring } public void testStopwordsAllFieldsMaxPercentDocs() throws Exception { protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, 1f / 2f); - TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", new StringReader("boring")); + TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "boring"); // A filter on terms in > one half of docs remove boring assertTokenStreamContents(protectedTokenStream, new String[0]); - protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", new StringReader("vaguelyboring")); + protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "vaguelyboring"); // A filter on terms in > half of docs should not remove vaguelyBoring assertTokenStreamContents(protectedTokenStream, new String[]{"vaguelyboring"}); protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, 1f / 4f); - protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", new StringReader("vaguelyboring")); + protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "vaguelyboring"); // A filter on terms in > quarter of docs should remove vaguelyBoring assertTokenStreamContents(protectedTokenStream, new String[0]); } public void testStopwordsPerFieldMaxPercentDocs() throws Exception { protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.asList("variedField"), 1f / 2f); - TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", new StringReader("boring")); + TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "boring"); // A filter on one Field should not affect queries on another assertTokenStreamContents(protectedTokenStream, new String[]{"boring"}); protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.asList("variedField", "repetitiveField"), 1f / 2f); - protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", new StringReader("boring")); + protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "boring"); // A filter on the right Field should affect queries on it assertTokenStreamContents(protectedTokenStream, new String[0]); } @@ -121,11 +120,11 @@ public void testNoFieldNamePollution() throws Exception { protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.asList("repetitiveField"), 10); - TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", new StringReader("boring")); + TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", "boring"); // Check filter set up OK assertTokenStreamContents(protectedTokenStream, new String[0]); - protectedTokenStream = protectedAnalyzer.tokenStream("variedField", new StringReader("boring")); + protectedTokenStream = protectedAnalyzer.tokenStream("variedField", "boring"); // Filter should not prevent stopwords in one field being used in another assertTokenStreamContents(protectedTokenStream, new String[]{"boring"}); } @@ -134,7 +133,7 @@ QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer( TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false), reader, 10); - TokenStream ts = a.tokenStream("repetitiveField", new StringReader("this boring")); + TokenStream ts = a.tokenStream("repetitiveField", "this boring"); assertTokenStreamContents(ts, new String[] { "this" }); } } Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java (revision 1500768) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java (working copy) @@ -17,8 +17,6 @@ * limitations under the License. */ -import java.io.StringReader; - import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.MockAnalyzer; @@ -97,7 +95,7 @@ public void testShingleAnalyzerWrapperPhraseQuery() throws Exception { PhraseQuery q = new PhraseQuery(); - TokenStream ts = analyzer.tokenStream("content", new StringReader("this sentence")); + TokenStream ts = analyzer.tokenStream("content", "this sentence"); int j = -1; PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class); @@ -123,7 +121,7 @@ public void testShingleAnalyzerWrapperBooleanQuery() throws Exception { BooleanQuery q = new BooleanQuery(); - TokenStream ts = analyzer.tokenStream("content", new StringReader("test sentence")); + TokenStream ts = analyzer.tokenStream("content", "test sentence"); CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java (revision 1500768) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java (working copy) @@ -91,7 +91,7 @@ Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); - TokenStream tokenStream = analyzer.tokenStream("field", new StringReader("abcd ")); + TokenStream tokenStream = analyzer.tokenStream("field", "abcd "); TeeSinkTokenFilter tee = new TeeSinkTokenFilter(tokenStream); TokenStream sink = tee.newSinkTokenStream(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java (revision 1500768) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java (working copy) @@ -19,7 +19,6 @@ import java.io.IOException; import java.io.Reader; -import java.io.StringReader; import java.util.Random; import org.apache.lucene.analysis.Analyzer; @@ -30,7 +29,6 @@ import org.apache.lucene.analysis.core.StopAnalyzer; import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; import org.apache.lucene.analysis.util.CharArraySet; -import org.apache.lucene.util.Version; /** * Test case for ThaiAnalyzer, modified from TestFrenchAnalyzer @@ -122,10 +120,10 @@ public void testAttributeReuse() throws Exception { ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT); // just consume - TokenStream ts = analyzer.tokenStream("dummy", new StringReader("ภาษาไทย")); + TokenStream ts = analyzer.tokenStream("dummy", "ภาษาไทย"); assertTokenStreamContents(ts, new String[] { "ภาษา", "ไทย" }); // this consumer adds flagsAtt, which this analyzer does not use. - ts = analyzer.tokenStream("dummy", new StringReader("ภาษาไทย")); + ts = analyzer.tokenStream("dummy", "ภาษาไทย"); ts.addAttribute(FlagsAttribute.class); assertTokenStreamContents(ts, new String[] { "ภาษา", "ไทย" }); } Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java =================================================================== --- lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java (revision 1500768) +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java (working copy) @@ -123,7 +123,7 @@ int num = 1000 * RANDOM_MULTIPLIER; for (int i = 0; i < num; i++) { String s = _TestUtil.randomUnicodeString(random()); - TokenStream ts = analyzer.tokenStream("foo", new StringReader(s)); + TokenStream ts = analyzer.tokenStream("foo", s); ts.reset(); OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); while (ts.incrementToken()) { @@ -161,7 +161,7 @@ int num = 1000 * RANDOM_MULTIPLIER; for (int i = 0; i < num; i++) { String s = _TestUtil.randomUnicodeString(random()); - TokenStream ts = analyzer.tokenStream("foo", new StringReader(s)); + TokenStream ts = analyzer.tokenStream("foo", s); ts.reset(); OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); while (ts.incrementToken()) { Index: lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java =================================================================== --- lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java (revision 1500768) +++ lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java (working copy) @@ -249,7 +249,7 @@ } public void testTokenAttributes() throws Exception { - TokenStream ts = a.tokenStream("dummy", new StringReader("This is a test")); + TokenStream ts = a.tokenStream("dummy", "This is a test"); ScriptAttribute scriptAtt = ts.addAttribute(ScriptAttribute.class); ts.reset(); while (ts.incrementToken()) { Index: lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestExtendedMode.java =================================================================== --- lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestExtendedMode.java (revision 1500768) +++ lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestExtendedMode.java (working copy) @@ -19,7 +19,6 @@ import java.io.IOException; import java.io.Reader; -import java.io.StringReader; import java.util.Random; import org.apache.lucene.analysis.Analyzer; @@ -54,7 +53,7 @@ int numIterations = atLeast(1000); for (int i = 0; i < numIterations; i++) { String s = _TestUtil.randomUnicodeString(random(), 100); - TokenStream ts = analyzer.tokenStream("foo", new StringReader(s)); + TokenStream ts = analyzer.tokenStream("foo", s); CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); ts.reset(); while (ts.incrementToken()) { Index: lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseAnalyzer.java =================================================================== --- lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseAnalyzer.java (revision 1500768) +++ lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseAnalyzer.java (working copy) @@ -18,7 +18,6 @@ */ import java.io.IOException; -import java.io.StringReader; import java.util.Random; import org.apache.lucene.analysis.Analyzer; @@ -151,7 +150,7 @@ Mode.SEARCH, JapaneseAnalyzer.getDefaultStopSet(), JapaneseAnalyzer.getDefaultStopTags()); - assertTokenStreamContents(a.tokenStream("foo", new StringReader("abcd")), + assertTokenStreamContents(a.tokenStream("foo", "abcd"), new String[] { "a", "b", "cd" }, new int[] { 0, 1, 2 }, new int[] { 1, 2, 4 }, Index: lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java =================================================================== --- lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java (revision 1500768) +++ lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java (working copy) @@ -22,7 +22,6 @@ import java.io.InputStreamReader; import java.io.LineNumberReader; import java.io.Reader; -import java.io.StringReader; import java.util.Random; import org.apache.lucene.analysis.Analyzer; @@ -142,7 +141,7 @@ * ideally the test would actually fail instead of hanging... */ public void testDecomposition5() throws Exception { - TokenStream ts = analyzer.tokenStream("bogus", new StringReader("くよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよ")); + TokenStream ts = analyzer.tokenStream("bogus", "くよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよ"); ts.reset(); while (ts.incrementToken()) { @@ -166,8 +165,8 @@ /** Tests that sentence offset is incorporated into the resulting offsets */ public void testTwoSentences() throws Exception { /* - //TokenStream ts = a.tokenStream("foo", new StringReader("妹の咲子です。俺と年子で、今受験生です。")); - TokenStream ts = analyzer.tokenStream("foo", new StringReader("�?>-->;")); + //TokenStream ts = a.tokenStream("foo", "妹の咲子です。俺と年子で、今受験生です。"); + TokenStream ts = analyzer.tokenStream("foo", "�?>-->;"); ts.reset(); CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); while(ts.incrementToken()) { @@ -214,7 +213,7 @@ public void testLargeDocReliability() throws Exception { for (int i = 0; i < 100; i++) { String s = _TestUtil.randomUnicodeString(random(), 10000); - TokenStream ts = analyzer.tokenStream("foo", new StringReader(s)); + TokenStream ts = analyzer.tokenStream("foo", s); ts.reset(); while (ts.incrementToken()) { } @@ -235,7 +234,7 @@ System.out.println("\nTEST: iter=" + i); } String s = _TestUtil.randomUnicodeString(random(), 100); - TokenStream ts = analyzer.tokenStream("foo", new StringReader(s)); + TokenStream ts = analyzer.tokenStream("foo", s); CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); ts.reset(); while (ts.incrementToken()) { @@ -245,14 +244,14 @@ } public void testOnlyPunctuation() throws IOException { - TokenStream ts = analyzerNoPunct.tokenStream("foo", new StringReader("。、。。")); + TokenStream ts = analyzerNoPunct.tokenStream("foo", "。、。。"); ts.reset(); assertFalse(ts.incrementToken()); ts.end(); } public void testOnlyPunctuationExtended() throws IOException { - TokenStream ts = extendedModeAnalyzerNoPunct.tokenStream("foo", new StringReader("......")); + TokenStream ts = extendedModeAnalyzerNoPunct.tokenStream("foo", "......"); ts.reset(); assertFalse(ts.incrementToken()); ts.end(); @@ -261,14 +260,14 @@ // note: test is kinda silly since kuromoji emits punctuation tokens. // but, when/if we filter these out it will be useful. public void testEnd() throws Exception { - assertTokenStreamContents(analyzerNoPunct.tokenStream("foo", new StringReader("これは本ではない")), + assertTokenStreamContents(analyzerNoPunct.tokenStream("foo", "これは本ではない"), new String[] { "これ", "は", "本", "で", "は", "ない" }, new int[] { 0, 2, 3, 4, 5, 6 }, new int[] { 2, 3, 4, 5, 6, 8 }, new Integer(8) ); - assertTokenStreamContents(analyzerNoPunct.tokenStream("foo", new StringReader("これは本ではない ")), + assertTokenStreamContents(analyzerNoPunct.tokenStream("foo", "これは本ではない "), new String[] { "これ", "は", "本", "で", "は", "ない" }, new int[] { 0, 2, 3, 4, 5, 6, 8 }, new int[] { 2, 3, 4, 5, 6, 8, 9 }, @@ -279,7 +278,7 @@ public void testUserDict() throws Exception { // Not a great test because w/o userdict.txt the // segmentation is the same: - assertTokenStreamContents(analyzer.tokenStream("foo", new StringReader("関西国際空港に行った")), + assertTokenStreamContents(analyzer.tokenStream("foo", "関西国際空港に行った"), new String[] { "関西", "国際", "空港", "に", "行っ", "た" }, new int[] { 0, 2, 4, 6, 7, 9 }, new int[] { 2, 4, 6, 7, 9, 10 }, @@ -289,7 +288,7 @@ public void testUserDict2() throws Exception { // Better test: w/o userdict the segmentation is different: - assertTokenStreamContents(analyzer.tokenStream("foo", new StringReader("朝青龍")), + assertTokenStreamContents(analyzer.tokenStream("foo", "朝青龍"), new String[] { "朝青龍" }, new int[] { 0 }, new int[] { 3 }, @@ -299,7 +298,7 @@ public void testUserDict3() throws Exception { // Test entry that breaks into multiple tokens: - assertTokenStreamContents(analyzer.tokenStream("foo", new StringReader("abcd")), + assertTokenStreamContents(analyzer.tokenStream("foo", "abcd"), new String[] { "a", "b", "cd" }, new int[] { 0, 1, 2 }, new int[] { 1, 2, 4 }, @@ -315,7 +314,7 @@ /* public void testUserDict4() throws Exception { // Test entry that has another entry as prefix - assertTokenStreamContents(analyzer.tokenStream("foo", new StringReader("abcdefghij")), + assertTokenStreamContents(analyzer.tokenStream("foo", "abcdefghij"), new String[] { "ab", "cd", "efg", "hij" }, new int[] { 0, 2, 4, 7 }, new int[] { 2, 4, 7, 10 }, @@ -366,7 +365,7 @@ } private void assertReadings(String input, String... readings) throws IOException { - TokenStream ts = analyzer.tokenStream("ignored", new StringReader(input)); + TokenStream ts = analyzer.tokenStream("ignored", input); ReadingAttribute readingAtt = ts.addAttribute(ReadingAttribute.class); ts.reset(); for(String reading : readings) { @@ -378,7 +377,7 @@ } private void assertPronunciations(String input, String... pronunciations) throws IOException { - TokenStream ts = analyzer.tokenStream("ignored", new StringReader(input)); + TokenStream ts = analyzer.tokenStream("ignored", input); ReadingAttribute readingAtt = ts.addAttribute(ReadingAttribute.class); ts.reset(); for(String pronunciation : pronunciations) { @@ -390,7 +389,7 @@ } private void assertBaseForms(String input, String... baseForms) throws IOException { - TokenStream ts = analyzer.tokenStream("ignored", new StringReader(input)); + TokenStream ts = analyzer.tokenStream("ignored", input); BaseFormAttribute baseFormAtt = ts.addAttribute(BaseFormAttribute.class); ts.reset(); for(String baseForm : baseForms) { @@ -402,7 +401,7 @@ } private void assertInflectionTypes(String input, String... inflectionTypes) throws IOException { - TokenStream ts = analyzer.tokenStream("ignored", new StringReader(input)); + TokenStream ts = analyzer.tokenStream("ignored", input); InflectionAttribute inflectionAtt = ts.addAttribute(InflectionAttribute.class); ts.reset(); for(String inflectionType : inflectionTypes) { @@ -414,7 +413,7 @@ } private void assertInflectionForms(String input, String... inflectionForms) throws IOException { - TokenStream ts = analyzer.tokenStream("ignored", new StringReader(input)); + TokenStream ts = analyzer.tokenStream("ignored", input); InflectionAttribute inflectionAtt = ts.addAttribute(InflectionAttribute.class); ts.reset(); for(String inflectionForm : inflectionForms) { @@ -426,7 +425,7 @@ } private void assertPartsOfSpeech(String input, String... partsOfSpeech) throws IOException { - TokenStream ts = analyzer.tokenStream("ignored", new StringReader(input)); + TokenStream ts = analyzer.tokenStream("ignored", input); PartOfSpeechAttribute partOfSpeechAtt = ts.addAttribute(PartOfSpeechAttribute.class); ts.reset(); for(String partOfSpeech : partsOfSpeech) { @@ -619,7 +618,7 @@ if (numIterations > 1) { // warmup for (int i = 0; i < numIterations; i++) { - final TokenStream ts = analyzer.tokenStream("ignored", new StringReader(line)); + final TokenStream ts = analyzer.tokenStream("ignored", line); ts.reset(); while(ts.incrementToken()); } @@ -628,7 +627,7 @@ long totalStart = System.currentTimeMillis(); for (int i = 0; i < numIterations; i++) { - final TokenStream ts = analyzer.tokenStream("ignored", new StringReader(line)); + final TokenStream ts = analyzer.tokenStream("ignored", line); ts.reset(); while(ts.incrementToken()); } @@ -640,7 +639,7 @@ totalStart = System.currentTimeMillis(); for (int i = 0; i < numIterations; i++) { for (String sentence: sentences) { - final TokenStream ts = analyzer.tokenStream("ignored", new StringReader(sentence)); + final TokenStream ts = analyzer.tokenStream("ignored", sentence); ts.reset(); while(ts.incrementToken()); } Index: lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java =================================================================== --- lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java (revision 1500768) +++ lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java (working copy) @@ -19,7 +19,6 @@ import java.io.IOException; import java.io.Reader; -import java.io.StringReader; import java.util.TreeSet; import org.apache.lucene.analysis.Analyzer; @@ -73,7 +72,7 @@ @SuppressWarnings("unused") private void dumpTokens(String input) throws IOException { - TokenStream ts = getTestAnalyzer().tokenStream("dummy", new StringReader(input)); + TokenStream ts = getTestAnalyzer().tokenStream("dummy", input); ts.reset(); MorphosyntacticTagsAttribute attribute = ts.getAttribute(MorphosyntacticTagsAttribute.class); @@ -86,7 +85,7 @@ /** Test reuse of MorfologikFilter with leftover stems. */ public final void testLeftoverStems() throws IOException { Analyzer a = getTestAnalyzer(); - TokenStream ts_1 = a.tokenStream("dummy", new StringReader("liście")); + TokenStream ts_1 = a.tokenStream("dummy", "liście"); CharTermAttribute termAtt_1 = ts_1.getAttribute(CharTermAttribute.class); ts_1.reset(); ts_1.incrementToken(); @@ -94,7 +93,7 @@ ts_1.end(); ts_1.close(); - TokenStream ts_2 = a.tokenStream("dummy", new StringReader("danych")); + TokenStream ts_2 = a.tokenStream("dummy", "danych"); CharTermAttribute termAtt_2 = ts_2.getAttribute(CharTermAttribute.class); ts_2.reset(); ts_2.incrementToken(); @@ -141,7 +140,7 @@ /** Test morphosyntactic annotations. */ public final void testPOSAttribute() throws IOException { - TokenStream ts = getTestAnalyzer().tokenStream("dummy", new StringReader("liście")); + TokenStream ts = getTestAnalyzer().tokenStream("dummy", "liście"); ts.reset(); assertPOSToken(ts, "liście", Index: lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java =================================================================== --- lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java (revision 1500768) +++ lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java (working copy) @@ -19,7 +19,6 @@ import java.io.IOException; import java.io.Reader; -import java.io.StringReader; import java.util.Random; import org.apache.lucene.analysis.BaseTokenStreamTestCase; @@ -185,7 +184,7 @@ sb.append("我购买了道具和服装。"); } Analyzer analyzer = new SmartChineseAnalyzer(TEST_VERSION_CURRENT); - TokenStream stream = analyzer.tokenStream("", new StringReader(sb.toString())); + TokenStream stream = analyzer.tokenStream("", sb.toString()); stream.reset(); while (stream.incrementToken()) { } @@ -198,7 +197,7 @@ sb.append("我购买了道具和服装"); } Analyzer analyzer = new SmartChineseAnalyzer(TEST_VERSION_CURRENT); - TokenStream stream = analyzer.tokenStream("", new StringReader(sb.toString())); + TokenStream stream = analyzer.tokenStream("", sb.toString()); stream.reset(); while (stream.incrementToken()) { } Index: lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java =================================================================== --- lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java (revision 1500768) +++ lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java (working copy) @@ -35,7 +35,6 @@ import org.junit.Before; import org.junit.Test; -import java.io.StringReader; import java.util.HashMap; import java.util.Map; @@ -62,7 +61,7 @@ @Test public void baseUIMAAnalyzerStreamTest() throws Exception { - TokenStream ts = analyzer.tokenStream("text", new StringReader("the big brown fox jumped on the wood")); + TokenStream ts = analyzer.tokenStream("text", "the big brown fox jumped on the wood"); assertTokenStreamContents(ts, new String[]{"the", "big", "brown", "fox", "jumped", "on", "the", "wood"}); } Index: lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java =================================================================== --- lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java (revision 1500768) +++ lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java (working copy) @@ -23,8 +23,6 @@ import org.junit.Before; import org.junit.Test; -import java.io.StringReader; - /** * Testcase for {@link UIMATypeAwareAnalyzer} */ @@ -51,7 +49,7 @@ public void baseUIMATypeAwareAnalyzerStreamTest() throws Exception { // create a token stream - TokenStream ts = analyzer.tokenStream("text", new StringReader("the big brown fox jumped on the wood")); + TokenStream ts = analyzer.tokenStream("text", "the big brown fox jumped on the wood"); // check that 'the big brown fox jumped on the wood' tokens have the expected PoS types assertTokenStreamContents(ts, Index: lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java =================================================================== --- lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (revision 1500768) +++ lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (working copy) @@ -21,7 +21,6 @@ import java.io.File; import java.io.FileInputStream; import java.io.InputStreamReader; -import java.io.StringReader; import java.text.Collator; import java.util.List; import java.util.Locale; @@ -979,8 +978,8 @@ private void assertEqualCollation(Analyzer a1, Analyzer a2, String text) throws Exception { - TokenStream ts1 = a1.tokenStream("bogus", new StringReader(text)); - TokenStream ts2 = a2.tokenStream("bogus", new StringReader(text)); + TokenStream ts1 = a1.tokenStream("bogus", text); + TokenStream ts2 = a2.tokenStream("bogus", text); ts1.reset(); ts2.reset(); TermToBytesRefAttribute termAtt1 = ts1.addAttribute(TermToBytesRefAttribute.class); @@ -1030,7 +1029,7 @@ Benchmark benchmark = execBenchmark(getAnalyzerFactoryConfig ("shingle-analyzer", "StandardTokenizer,ShingleFilter")); benchmark.getRunData().getAnalyzer().tokenStream - ("bogus", new StringReader(text)).close(); + ("bogus", text).close(); BaseTokenStreamTestCase.assertAnalyzesTo(benchmark.getRunData().getAnalyzer(), text, new String[] { "one", "one two", "two", "two three", "three", "three four", "four", "four five", Index: lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java =================================================================== --- lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java (revision 1500768) +++ lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java (working copy) @@ -33,7 +33,6 @@ import org.apache.lucene.util.BytesRef; import java.io.IOException; -import java.io.StringReader; import java.util.Collection; import java.util.LinkedList; @@ -86,7 +85,7 @@ private String[] tokenizeDoc(String doc) throws IOException { Collection result = new LinkedList(); - TokenStream tokenStream = analyzer.tokenStream(textFieldName, new StringReader(doc)); + TokenStream tokenStream = analyzer.tokenStream(textFieldName, doc); CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); tokenStream.reset(); while (tokenStream.incrementToken()) { Index: lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java =================================================================== --- lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java (revision 1500768) +++ lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java (working copy) @@ -140,6 +140,44 @@ } /** + * Returns a TokenStream suitable for fieldName, tokenizing + * the contents of text. + *

+ * This method uses {@link #createComponents(String, Reader)} to obtain an + * instance of {@link TokenStreamComponents}. It returns the sink of the + * components and stores the components internally. Subsequent calls to this + * method will reuse the previously stored components after resetting them + * through {@link TokenStreamComponents#setReader(Reader)}. + *

+ * NOTE: After calling this method, the consumer must follow the + * workflow described in {@link TokenStream} to properly consume its contents. + * See the {@link org.apache.lucene.analysis Analysis package documentation} for + * some examples demonstrating this. + * + * @param fieldName the name of the field the created TokenStream is used for + * @param text the String the streams source reads from + * @return TokenStream for iterating the analyzed content of reader + * @throws AlreadyClosedException if the Analyzer is closed. + * @throws IOException if an i/o error occurs (may rarely happen for Strings, but can still happen). + */ + public final TokenStream tokenStream(final String fieldName, final String text) throws IOException { + TokenStreamComponents components = reuseStrategy.getReusableComponents(fieldName); + @SuppressWarnings("resource") final ReusableStringReader strReader = + (components == null || components.reusableStringReader == null) ? + new ReusableStringReader() : components.reusableStringReader; + strReader.setValue(text); + final Reader r = initReader(fieldName, strReader); + if (components == null) { + components = createComponents(fieldName, r); + reuseStrategy.setReusableComponents(fieldName, components); + } else { + components.setReader(r); + } + components.reusableStringReader = strReader; + return components.getTokenStream(); + } + + /** * Override this if you want to add a CharFilter chain. *

* The default implementation returns reader @@ -208,6 +246,9 @@ * the chain. This can be the source if there are no filters. */ protected final TokenStream sink; + + /** a cache when used with String instead of Reader */ + transient ReusableStringReader reusableStringReader; /** * Creates a new {@link TokenStreamComponents} instance. Index: lucene/core/src/java/org/apache/lucene/analysis/ReusableStringReader.java =================================================================== --- lucene/core/src/java/org/apache/lucene/analysis/ReusableStringReader.java (revision 0) +++ lucene/core/src/java/org/apache/lucene/analysis/ReusableStringReader.java (working copy) @@ -0,0 +1,61 @@ +package org.apache.lucene.analysis; + +import java.io.Reader; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** Internal class to enable reuse of the string reader by {@link Analyzer#tokenStream(String,String)} */ +final class ReusableStringReader extends Reader { + private int pos = 0, size = 0; + private String s = null; + + void setValue(String s) { + this.s = s; + this.size = s.length(); + this.pos = 0; + } + + @Override + public int read() { + if (pos < size) { + return s.charAt(pos++); + } else { + s = null; + return -1; + } + } + + @Override + public int read(char[] c, int off, int len) { + if (pos < size) { + len = Math.min(len, size-pos); + s.getChars(pos, pos+len, c, off); + pos += len; + return len; + } else { + s = null; + return -1; + } + } + + @Override + public void close() { + pos = size; // this prevents NPE when reading after close! + s = null; + } +} Index: lucene/core/src/java/org/apache/lucene/analysis/ReusableStringReader.java =================================================================== --- lucene/core/src/java/org/apache/lucene/analysis/ReusableStringReader.java (revision 0) +++ lucene/core/src/java/org/apache/lucene/analysis/ReusableStringReader.java (working copy) Property changes on: lucene/core/src/java/org/apache/lucene/analysis/ReusableStringReader.java ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +Date Author Id Revision HeadURL \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/core/src/java/org/apache/lucene/document/Field.java =================================================================== --- lucene/core/src/java/org/apache/lucene/document/Field.java (revision 1500768) +++ lucene/core/src/java/org/apache/lucene/document/Field.java (working copy) @@ -75,7 +75,6 @@ protected TokenStream tokenStream; private transient TokenStream internalTokenStream; - private transient ReusableStringReader internalReader; /** * Field's boost @@ -552,56 +551,12 @@ } else if (readerValue() != null) { return analyzer.tokenStream(name(), readerValue()); } else if (stringValue() != null) { - if (internalReader == null) { - internalReader = new ReusableStringReader(); - } - internalReader.setValue(stringValue()); - return analyzer.tokenStream(name(), internalReader); + return analyzer.tokenStream(name(), stringValue()); } throw new IllegalArgumentException("Field must have either TokenStream, String, Reader or Number value"); } - static final class ReusableStringReader extends Reader { - private int pos = 0, size = 0; - private String s = null; - - void setValue(String s) { - this.s = s; - this.size = s.length(); - this.pos = 0; - } - - @Override - public int read() { - if (pos < size) { - return s.charAt(pos++); - } else { - s = null; - return -1; - } - } - - @Override - public int read(char[] c, int off, int len) { - if (pos < size) { - len = Math.min(len, size-pos); - s.getChars(pos, pos+len, c, off); - pos += len; - return len; - } else { - s = null; - return -1; - } - } - - @Override - public void close() { - pos = size; // this prevents NPE when reading after close! - s = null; - } - } - static final class StringTokenStream extends TokenStream { private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class); Index: lucene/core/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java =================================================================== --- lucene/core/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java (revision 1500768) +++ lucene/core/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java (working copy) @@ -96,7 +96,7 @@ String testString = "t"; Analyzer analyzer = new MockAnalyzer(random()); - TokenStream stream = analyzer.tokenStream("dummy", new StringReader(testString)); + TokenStream stream = analyzer.tokenStream("dummy", testString); stream.reset(); while (stream.incrementToken()) { // consume Index: lucene/core/src/test/org/apache/lucene/analysis/TestReusableStringReader.java =================================================================== --- lucene/core/src/test/org/apache/lucene/analysis/TestReusableStringReader.java (revision 0) +++ lucene/core/src/test/org/apache/lucene/analysis/TestReusableStringReader.java (working copy) @@ -0,0 +1,61 @@ +package org.apache.lucene.analysis; + +import java.nio.CharBuffer; + +import org.apache.lucene.util.LuceneTestCase; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +public class TestReusableStringReader extends LuceneTestCase { + + public void test() throws Exception { + ReusableStringReader reader = new ReusableStringReader(); + assertEquals(-1, reader.read()); + assertEquals(-1, reader.read(new char[1])); + assertEquals(-1, reader.read(new char[2], 1, 1)); + assertEquals(-1, reader.read(CharBuffer.wrap(new char[2]))); + + reader.setValue("foobar"); + char[] buf = new char[4]; + assertEquals(4, reader.read(buf)); + assertEquals("foob", new String(buf)); + assertEquals(2, reader.read(buf)); + assertEquals("ar", new String(buf, 0, 2)); + assertEquals(-1, reader.read(buf)); + reader.close(); + + reader.setValue("foobar"); + assertEquals(0, reader.read(buf, 1, 0)); + assertEquals(3, reader.read(buf, 1, 3)); + assertEquals("foo", new String(buf, 1, 3)); + assertEquals(2, reader.read(CharBuffer.wrap(buf, 2, 2))); + assertEquals("ba", new String(buf, 2, 2)); + assertEquals('r', (char) reader.read()); + assertEquals(-1, reader.read(buf)); + reader.close(); + + reader.setValue("foobar"); + StringBuilder sb = new StringBuilder(); + int ch; + while ((ch = reader.read()) != -1) { + sb.append((char) ch); + } + reader.close(); + assertEquals("foobar", sb.toString()); + } + +} Index: lucene/core/src/test/org/apache/lucene/analysis/TestReusableStringReader.java =================================================================== --- lucene/core/src/test/org/apache/lucene/analysis/TestReusableStringReader.java (revision 0) +++ lucene/core/src/test/org/apache/lucene/analysis/TestReusableStringReader.java (working copy) Property changes on: lucene/core/src/test/org/apache/lucene/analysis/TestReusableStringReader.java ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +Date Author Id Revision HeadURL \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/core/src/test/org/apache/lucene/document/TestField.java =================================================================== --- lucene/core/src/test/org/apache/lucene/document/TestField.java (revision 1500768) +++ lucene/core/src/test/org/apache/lucene/document/TestField.java (working copy) @@ -18,11 +18,8 @@ */ import java.io.StringReader; -import java.nio.CharBuffer; - import org.apache.lucene.analysis.CannedTokenStream; import org.apache.lucene.analysis.Token; -import org.apache.lucene.document.Field.ReusableStringReader; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; @@ -518,39 +515,4 @@ } } - public void testReusableStringReader() throws Exception { - ReusableStringReader reader = new ReusableStringReader(); - assertEquals(-1, reader.read()); - assertEquals(-1, reader.read(new char[1])); - assertEquals(-1, reader.read(new char[2], 1, 1)); - assertEquals(-1, reader.read(CharBuffer.wrap(new char[2]))); - - reader.setValue("foobar"); - char[] buf = new char[4]; - assertEquals(4, reader.read(buf)); - assertEquals("foob", new String(buf)); - assertEquals(2, reader.read(buf)); - assertEquals("ar", new String(buf, 0, 2)); - assertEquals(-1, reader.read(buf)); - reader.close(); - - reader.setValue("foobar"); - assertEquals(0, reader.read(buf, 1, 0)); - assertEquals(3, reader.read(buf, 1, 3)); - assertEquals("foo", new String(buf, 1, 3)); - assertEquals(2, reader.read(CharBuffer.wrap(buf, 2, 2))); - assertEquals("ba", new String(buf, 2, 2)); - assertEquals('r', (char) reader.read()); - assertEquals(-1, reader.read(buf)); - reader.close(); - - reader.setValue("foobar"); - StringBuilder sb = new StringBuilder(); - int ch; - while ((ch = reader.read()) != -1) { - sb.append((char) ch); - } - reader.close(); - assertEquals("foobar", sb.toString()); - } } Index: lucene/core/src/test/org/apache/lucene/index/TestLongPostings.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestLongPostings.java (revision 1500768) +++ lucene/core/src/test/org/apache/lucene/index/TestLongPostings.java (working copy) @@ -18,8 +18,6 @@ */ import java.io.IOException; -import java.io.StringReader; - import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.TokenStream; @@ -49,7 +47,7 @@ if (other != null && s.equals(other)) { continue; } - final TokenStream ts = a.tokenStream("foo", new StringReader(s)); + final TokenStream ts = a.tokenStream("foo", s); final TermToBytesRefAttribute termAtt = ts.getAttribute(TermToBytesRefAttribute.class); final BytesRef termBytes = termAtt.getBytesRef(); ts.reset(); Index: lucene/core/src/test/org/apache/lucene/index/TestTermVectorsWriter.java =================================================================== --- lucene/core/src/test/org/apache/lucene/index/TestTermVectorsWriter.java (revision 1500768) +++ lucene/core/src/test/org/apache/lucene/index/TestTermVectorsWriter.java (working copy) @@ -18,8 +18,6 @@ */ import java.io.IOException; -import java.io.StringReader; - import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.MockAnalyzer; @@ -176,7 +174,7 @@ Analyzer analyzer = new MockAnalyzer(random()); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); - TokenStream stream = analyzer.tokenStream("field", new StringReader("abcd ")); + TokenStream stream = analyzer.tokenStream("field", "abcd "); stream.reset(); // TODO: weird to reset before wrapping with CachingTokenFilter... correct? stream = new CachingTokenFilter(stream); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); Index: lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java =================================================================== --- lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java (revision 1500768) +++ lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java (working copy) @@ -617,7 +617,7 @@ break; } } - TokenStream ts = analyzer.tokenStream("ignore", new StringReader(term)); + TokenStream ts = analyzer.tokenStream("ignore", term); CharTermAttribute termAttr = ts.addAttribute(CharTermAttribute.class); ts.reset(); while(ts.incrementToken()) { Index: lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java =================================================================== --- lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java (revision 1500768) +++ lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java (working copy) @@ -17,7 +17,6 @@ */ import java.io.IOException; -import java.io.StringReader; import java.util.ArrayList; import java.util.Iterator; @@ -25,7 +24,6 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.util.PriorityQueue; /** @@ -78,7 +76,7 @@ public final String getBestFragment(Analyzer analyzer, String fieldName,String text) throws IOException, InvalidTokenOffsetsException { - TokenStream tokenStream = analyzer.tokenStream(fieldName, new StringReader(text)); + TokenStream tokenStream = analyzer.tokenStream(fieldName, text); return getBestFragment(tokenStream, text); } @@ -130,7 +128,7 @@ int maxNumFragments) throws IOException, InvalidTokenOffsetsException { - TokenStream tokenStream = analyzer.tokenStream(fieldName, new StringReader(text)); + TokenStream tokenStream = analyzer.tokenStream(fieldName, text); return getBestFragments(tokenStream, text, maxNumFragments); } Index: lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java =================================================================== --- lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java (revision 1500768) +++ lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java (working copy) @@ -36,7 +36,6 @@ import org.apache.lucene.util.BytesRef; import java.io.IOException; -import java.io.StringReader; import java.util.ArrayList; import java.util.Comparator; @@ -314,7 +313,7 @@ public static TokenStream getTokenStream(String field, String contents, Analyzer analyzer) { try { - return analyzer.tokenStream(field, new StringReader(contents)); + return analyzer.tokenStream(field, contents); } catch (IOException ex) { throw new RuntimeException(ex); } Index: lucene/highlighter/src/test/org/apache/lucene/search/highlight/custom/HighlightCustomQueryTest.java =================================================================== --- lucene/highlighter/src/test/org/apache/lucene/search/highlight/custom/HighlightCustomQueryTest.java (revision 1500768) +++ lucene/highlighter/src/test/org/apache/lucene/search/highlight/custom/HighlightCustomQueryTest.java (working copy) @@ -17,7 +17,6 @@ * limitations under the License. */ import java.io.IOException; -import java.io.StringReader; import java.util.Map; import org.apache.lucene.analysis.MockAnalyzer; @@ -89,8 +88,7 @@ private String highlightField(Query query, String fieldName, String text) throws IOException, InvalidTokenOffsetsException { TokenStream tokenStream = new MockAnalyzer(random(), MockTokenizer.SIMPLE, - true, MockTokenFilter.ENGLISH_STOPSET).tokenStream(fieldName, - new StringReader(text)); + true, MockTokenFilter.ENGLISH_STOPSET).tokenStream(fieldName, text); // Assuming "", "" used to highlight SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(); MyQueryScorer scorer = new MyQueryScorer(query, fieldName, FIELD_NAME); Index: lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java =================================================================== --- lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (revision 1500768) +++ lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (working copy) @@ -20,7 +20,6 @@ import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.Reader; -import java.io.StringReader; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; @@ -248,7 +247,7 @@ private String highlightField(Query query, String fieldName, String text) throws IOException, InvalidTokenOffsetsException { TokenStream tokenStream = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET) - .tokenStream(fieldName, new StringReader(text)); + .tokenStream(fieldName, text); // Assuming "", "" used to highlight SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(); QueryScorer scorer = new QueryScorer(query, fieldName, FIELD_NAME); @@ -269,8 +268,7 @@ for (int i = 0; i < hits.totalHits; i++) { String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); - TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, - new StringReader(text)); + TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text); highlighter.setTextFragmenter(new SimpleFragmenter(40)); String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, @@ -348,7 +346,7 @@ for (int i = 0; i < hits.totalHits; i++) { String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); - TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); + TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text); highlighter.setTextFragmenter(new SimpleFragmenter(40)); @@ -377,7 +375,7 @@ for (int i = 0; i < hits.totalHits; i++) { String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); - TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); + TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text); highlighter.setTextFragmenter(new SimpleFragmenter(40)); @@ -406,7 +404,7 @@ for (int i = 0; i < hits.totalHits; i++) { String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); - TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); + TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text); highlighter.setTextFragmenter(new SimpleFragmenter(40)); @@ -431,7 +429,7 @@ for (int i = 0; i < hits.totalHits; i++) { String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); - TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); + TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text); highlighter.setTextFragmenter(new SimpleFragmenter(40)); @@ -455,7 +453,7 @@ for (int i = 0; i < hits.totalHits; i++) { String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); - TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); + TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text); highlighter.setTextFragmenter(new SimpleFragmenter(40)); @@ -479,7 +477,7 @@ for (int i = 0; i < hits.totalHits; i++) { String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); - TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); + TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text); highlighter.setTextFragmenter(new SimpleFragmenter(40)); @@ -505,7 +503,7 @@ for (int i = 0; i < hits.totalHits; i++) { String text = searcher.doc(hits.scoreDocs[i].doc).getField(NUMERIC_FIELD_NAME).numericValue().toString(); - TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); + TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text); highlighter.setTextFragmenter(new SimpleFragmenter(40)); @@ -533,7 +531,7 @@ for (int i = 0; i < hits.totalHits; i++) { String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); - TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); + TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text); String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "..."); @@ -555,7 +553,7 @@ for (int i = 0; i < hits.totalHits; i++) { String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); - TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); + TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text); QueryScorer scorer = new QueryScorer(query, FIELD_NAME); Highlighter highlighter = new Highlighter(this, scorer); @@ -585,7 +583,7 @@ for (int i = 0; i < hits.totalHits; i++) { String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); - TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); + TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 5)); @@ -608,7 +606,7 @@ for (int i = 0; i < hits.totalHits; i++) { String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); - TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); + TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 20)); @@ -639,7 +637,7 @@ for (int i = 0; i < hits.totalHits; i++) { String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); - TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); + TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text); highlighter.setTextFragmenter(new SimpleFragmenter(40)); @@ -710,7 +708,7 @@ int maxNumFragmentsRequired = 2; for (int i = 0; i < hits.totalHits; i++) { String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); - TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); + TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text); String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "..."); @@ -907,7 +905,7 @@ int maxNumFragmentsRequired = 2; String fragmentSeparator = "..."; QueryScorer scorer = new QueryScorer(query, HighlighterTest.FIELD_NAME); - TokenStream tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, new StringReader(text)); + TokenStream tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, text); Highlighter highlighter = new Highlighter(this, scorer); @@ -931,7 +929,7 @@ int maxNumFragmentsRequired = 2; String fragmentSeparator = "..."; QueryScorer scorer = new QueryScorer(query, null); - TokenStream tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, new StringReader(text)); + TokenStream tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, text); Highlighter highlighter = new Highlighter(this, scorer); @@ -955,7 +953,7 @@ int maxNumFragmentsRequired = 2; String fragmentSeparator = "..."; QueryScorer scorer = new QueryScorer(query, "random_field", HighlighterTest.FIELD_NAME); - TokenStream tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, new StringReader(text)); + TokenStream tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, text); Highlighter highlighter = new Highlighter(this, scorer); @@ -1126,7 +1124,7 @@ numHighlights = 0; for (int i = 0; i < hits.totalHits; i++) { String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); - TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); + TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text); Highlighter highlighter = getHighlighter(query, FIELD_NAME, HighlighterTest.this); @@ -1187,7 +1185,7 @@ Highlighter highlighter = getHighlighter(wTerms, HighlighterTest.this);// new // Highlighter(new // QueryTermScorer(wTerms)); - TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(texts[0])); + TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, texts[0]); highlighter.setTextFragmenter(new SimpleFragmenter(2)); String result = highlighter.getBestFragment(tokenStream, texts[0]).trim(); @@ -1196,7 +1194,7 @@ // readjust weights wTerms[1].setWeight(50f); - tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(texts[0])); + tokenStream = analyzer.tokenStream(FIELD_NAME, texts[0]); highlighter = getHighlighter(wTerms, HighlighterTest.this); highlighter.setTextFragmenter(new SimpleFragmenter(2)); @@ -1232,7 +1230,7 @@ Highlighter highlighter = getHighlighter(query, null, HighlighterTest.this); // Get 3 best fragments and separate with a "..." - TokenStream tokenStream = analyzer.tokenStream(null, new StringReader(s)); + TokenStream tokenStream = analyzer.tokenStream(null, s); String result = highlighter.getBestFragments(tokenStream, s, 3, "..."); String expectedResult = "football-soccer in the euro 2004 footie competition"; @@ -1257,7 +1255,7 @@ for (int i = 0; i < hits.totalHits; i++) { String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); - TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); + TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text); Highlighter highlighter = getHighlighter(query, FIELD_NAME, HighlighterTest.this); String result = highlighter.getBestFragment(tokenStream, text); @@ -1280,7 +1278,7 @@ for (int i = 0; i < hits.totalHits; i++) { String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); - TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); + TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text); Highlighter highlighter = getHighlighter(query, FIELD_NAME, HighlighterTest.this);// new Highlighter(this, new @@ -1288,7 +1286,7 @@ highlighter.setTextFragmenter(new SimpleFragmenter(20)); String stringResults[] = highlighter.getBestFragments(tokenStream, text, 10); - tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); + tokenStream = analyzer.tokenStream(FIELD_NAME, text); TextFragment fragmentResults[] = highlighter.getBestTextFragments(tokenStream, text, true, 10); @@ -1318,7 +1316,7 @@ public void run() throws Exception { numHighlights = 0; doSearching(new TermQuery(new Term(FIELD_NAME, "meat"))); - TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(texts[0])); + TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, texts[0]); Highlighter highlighter = getHighlighter(query, FIELD_NAME, HighlighterTest.this);// new Highlighter(this, new // QueryTermScorer(query)); @@ -1432,7 +1430,7 @@ for (int i = 0; i < hits.totalHits; i++) { String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); - TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); + TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text); Highlighter highlighter = getHighlighter(query, FIELD_NAME, HighlighterTest.this, false); highlighter.setTextFragmenter(new SimpleFragmenter(40)); @@ -1461,7 +1459,7 @@ doSearching(new TermQuery(new Term(FIELD_NAME, "aninvalidquerywhichshouldyieldnoresults"))); for (String text : texts) { - TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); + TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text); Highlighter highlighter = getHighlighter(query, FIELD_NAME, HighlighterTest.this); String result = highlighter.getBestFragment(tokenStream, text); @@ -1503,7 +1501,7 @@ } }); highlighter.setTextFragmenter(new SimpleFragmenter(2000)); - TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(rawDocContent)); + TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, rawDocContent); String encodedSnippet = highlighter.getBestFragments(tokenStream, rawDocContent, 1, ""); // An ugly bit of XML creation: @@ -1828,7 +1826,7 @@ * * for (int i = 0; i < hits.totalHits; i++) { String text = * searcher.doc2(hits.scoreDocs[i].doc).get(FIELD_NAME); TokenStream - * tokenStream=bigramAnalyzer.tokenStream(FIELD_NAME,new StringReader(text)); + * tokenStream=bigramAnalyzer.tokenStream(FIELD_NAME,text); * String highlightedText = highlighter.getBestFragment(tokenStream,text); * System.out.println(highlightedText); } } */ @@ -1855,7 +1853,7 @@ final int expectedHighlights) throws Exception { for (int i = 0; i < hits.totalHits; i++) { String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); - TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text)); + TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text); QueryScorer scorer = new QueryScorer(query, FIELD_NAME); Highlighter highlighter = new Highlighter(this, scorer); @@ -2104,7 +2102,7 @@ int maxNumFragmentsRequired = 2; String fragmentSeparator = "..."; Scorer scorer = null; - TokenStream tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, new StringReader(text)); + TokenStream tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, text); if (mode == QUERY) { scorer = new QueryScorer(query); } else if (mode == QUERY_TERM) { Index: lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java =================================================================== --- lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java (revision 1500768) +++ lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java (working copy) @@ -19,7 +19,6 @@ import java.io.IOException; import java.io.Reader; -import java.io.StringReader; import java.util.ArrayList; import java.util.Collection; import java.util.List; @@ -171,7 +170,7 @@ protected List analyze(String text, String field, Analyzer analyzer) throws IOException { List bytesRefs = new ArrayList(); - TokenStream tokenStream = analyzer.tokenStream(field, new StringReader(text)); + TokenStream tokenStream = analyzer.tokenStream(field, text); TermToBytesRefAttribute termAttribute = tokenStream.getAttribute(TermToBytesRefAttribute.class); BytesRef bytesRef = termAttribute.getBytesRef(); Index: lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java =================================================================== --- lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (revision 1500768) +++ lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (working copy) @@ -18,7 +18,6 @@ */ import java.io.IOException; -import java.io.StringReader; import java.util.Arrays; import java.util.Collection; import java.util.Comparator; @@ -291,7 +290,7 @@ TokenStream stream; try { - stream = analyzer.tokenStream(fieldName, new StringReader(text)); + stream = analyzer.tokenStream(fieldName, text); } catch (IOException ex) { throw new RuntimeException(ex); } Index: lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java =================================================================== --- lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java (revision 1500768) +++ lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java (working copy) @@ -22,7 +22,6 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; -import java.io.StringReader; import java.util.HashSet; import java.util.Set; @@ -378,7 +377,7 @@ SpanQuery wrappedquery = new SpanMultiTermQueryWrapper(regex); MemoryIndex mindex = new MemoryIndex(random().nextBoolean(), random().nextInt(50) * 1024 * 1024); - mindex.addField("field", new MockAnalyzer(random()).tokenStream("field", new StringReader("hello there"))); + mindex.addField("field", new MockAnalyzer(random()).tokenStream("field", "hello there")); // This throws an NPE assertEquals(0, mindex.search(wrappedquery), 0.00001f); @@ -390,7 +389,7 @@ SpanQuery wrappedquery = new SpanOrQuery(new SpanMultiTermQueryWrapper(regex)); MemoryIndex mindex = new MemoryIndex(random().nextBoolean(), random().nextInt(50) * 1024 * 1024); - mindex.addField("field", new MockAnalyzer(random()).tokenStream("field", new StringReader("hello there"))); + mindex.addField("field", new MockAnalyzer(random()).tokenStream("field", "hello there")); // This passes though assertEquals(0, mindex.search(wrappedquery), 0.00001f); Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java =================================================================== --- lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java (revision 1500768) +++ lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java (working copy) @@ -18,7 +18,6 @@ */ import java.io.IOException; -import java.io.StringReader; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -165,7 +164,7 @@ String analyzed = null; TokenStream stream = null; try{ - stream = getAnalyzer().tokenStream(field, new StringReader(chunk)); + stream = getAnalyzer().tokenStream(field, chunk); stream.reset(); CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); // get first and hopefully only output token Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java =================================================================== --- lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java (revision 1500768) +++ lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java (working copy) @@ -500,7 +500,7 @@ TokenStream source; try { - source = analyzer.tokenStream(field, new StringReader(queryText)); + source = analyzer.tokenStream(field, queryText); source.reset(); } catch (IOException e) { ParseException p = new ParseException("Unable to initialize TokenStream to analyze query text"); @@ -844,7 +844,7 @@ if (analyzerIn == null) analyzerIn = analyzer; try { - source = analyzerIn.tokenStream(field, new StringReader(part)); + source = analyzerIn.tokenStream(field, part); source.reset(); } catch (IOException e) { throw new RuntimeException("Unable to initialize TokenStream to analyze multiTerm term: " + part, e); Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/AnalyzerQueryNodeProcessor.java =================================================================== --- lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/AnalyzerQueryNodeProcessor.java (revision 1500768) +++ lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/AnalyzerQueryNodeProcessor.java (working copy) @@ -18,7 +18,6 @@ */ import java.io.IOException; -import java.io.StringReader; import java.util.ArrayList; import java.util.LinkedList; import java.util.List; @@ -116,7 +115,7 @@ TokenStream source; try { - source = this.analyzer.tokenStream(field, new StringReader(text)); + source = this.analyzer.tokenStream(field, text); source.reset(); } catch (IOException e1) { throw new RuntimeException(e1); Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/LikeThisQueryBuilder.java =================================================================== --- lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/LikeThisQueryBuilder.java (revision 1500768) +++ lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/LikeThisQueryBuilder.java (working copy) @@ -4,7 +4,6 @@ package org.apache.lucene.queryparser.xml.builders; import java.io.IOException; -import java.io.StringReader; import java.util.HashSet; import java.util.Set; @@ -75,7 +74,7 @@ stopWordsSet = new HashSet(); for (String field : fields) { try { - TokenStream ts = analyzer.tokenStream(field, new StringReader(stopWords)); + TokenStream ts = analyzer.tokenStream(field, stopWords); CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); ts.reset(); while (ts.incrementToken()) { Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/SpanOrTermsBuilder.java =================================================================== --- lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/SpanOrTermsBuilder.java (revision 1500768) +++ lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/SpanOrTermsBuilder.java (working copy) @@ -13,9 +13,9 @@ import org.w3c.dom.Element; import java.io.IOException; -import java.io.StringReader; import java.util.ArrayList; import java.util.List; + /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -51,7 +51,7 @@ try { List clausesList = new ArrayList(); - TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(value)); + TokenStream ts = analyzer.tokenStream(fieldName, value); TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); BytesRef bytes = termAtt.getBytesRef(); ts.reset(); Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/TermsFilterBuilder.java =================================================================== --- lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/TermsFilterBuilder.java (revision 1500768) +++ lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/TermsFilterBuilder.java (working copy) @@ -3,7 +3,6 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; -import org.apache.lucene.index.Term; import org.apache.lucene.search.Filter; import org.apache.lucene.queries.TermsFilter; import org.apache.lucene.util.BytesRef; @@ -13,7 +12,6 @@ import org.w3c.dom.Element; import java.io.IOException; -import java.io.StringReader; import java.util.ArrayList; import java.util.List; @@ -57,9 +55,8 @@ String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName"); try { - TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text)); + TokenStream ts = analyzer.tokenStream(fieldName, text); TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); - Term term = null; BytesRef bytes = termAtt.getBytesRef(); ts.reset(); while (ts.incrementToken()) { Index: lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/TermsQueryBuilder.java =================================================================== --- lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/TermsQueryBuilder.java (revision 1500768) +++ lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/TermsQueryBuilder.java (working copy) @@ -15,7 +15,6 @@ import org.w3c.dom.Element; import java.io.IOException; -import java.io.StringReader; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -53,7 +52,7 @@ BooleanQuery bq = new BooleanQuery(DOMUtils.getAttribute(e, "disableCoord", false)); bq.setMinimumNumberShouldMatch(DOMUtils.getAttribute(e, "minimumNumberShouldMatch", 0)); try { - TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text)); + TokenStream ts = analyzer.tokenStream(fieldName, text); TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); Term term = null; BytesRef bytes = termAtt.getBytesRef(); Index: lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/FuzzyLikeThisQuery.java =================================================================== --- lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/FuzzyLikeThisQuery.java (revision 1500768) +++ lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/FuzzyLikeThisQuery.java (working copy) @@ -18,7 +18,6 @@ */ import java.io.IOException; -import java.io.StringReader; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; @@ -194,7 +193,7 @@ private void addTerms(IndexReader reader, FieldVals f) throws IOException { if (f.queryString == null) return; - TokenStream ts = analyzer.tokenStream(f.fieldName, new StringReader(f.queryString)); + TokenStream ts = analyzer.tokenStream(f.fieldName, f.queryString); CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); int corpusNumDocs = reader.numDocs(); Index: lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java =================================================================== --- lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java (revision 1500768) +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java (working copy) @@ -21,7 +21,6 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.io.StringReader; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; @@ -856,7 +855,7 @@ final Set toFiniteStrings(final BytesRef surfaceForm, final TokenStreamToAutomaton ts2a) throws IOException { // Analyze surface form: - TokenStream ts = indexAnalyzer.tokenStream("", new StringReader(surfaceForm.utf8ToString())); + TokenStream ts = indexAnalyzer.tokenStream("", surfaceForm.utf8ToString()); // Create corresponding automaton: labels are bytes // from each analyzed token, with byte 0 used as @@ -881,7 +880,7 @@ final Automaton toLookupAutomaton(final CharSequence key) throws IOException { // TODO: is there a Reader from a CharSequence? // Turn tokenstream into automaton: - TokenStream ts = queryAnalyzer.tokenStream("", new StringReader(key.toString())); + TokenStream ts = queryAnalyzer.tokenStream("", key.toString()); Automaton automaton = (getTokenStreamToAutomaton()).toAutomaton(ts); ts.close(); Index: lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (revision 1500768) +++ lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (working copy) @@ -295,15 +295,15 @@ } public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException { - assertTokenStreamContents(a.tokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, null, input.length()); + assertTokenStreamContents(a.tokenStream("dummy", input), output, startOffsets, endOffsets, types, posIncrements, null, input.length()); } public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[]) throws IOException { - assertTokenStreamContents(a.tokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, posLengths, input.length()); + assertTokenStreamContents(a.tokenStream("dummy", input), output, startOffsets, endOffsets, types, posIncrements, posLengths, input.length()); } public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[], boolean offsetsAreCorrect) throws IOException { - assertTokenStreamContents(a.tokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, posLengths, input.length(), offsetsAreCorrect); + assertTokenStreamContents(a.tokenStream("dummy", input), output, startOffsets, endOffsets, types, posIncrements, posLengths, input.length(), offsetsAreCorrect); } public static void assertAnalyzesTo(Analyzer a, String input, String[] output) throws IOException { @@ -332,7 +332,7 @@ public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException { - assertTokenStreamContents(a.tokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, null, input.length()); + assertTokenStreamContents(a.tokenStream("dummy", input), output, startOffsets, endOffsets, types, posIncrements, null, input.length()); } public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output) throws IOException { @@ -891,7 +891,7 @@ protected String toDot(Analyzer a, String inputText) throws IOException { final StringWriter sw = new StringWriter(); - final TokenStream ts = a.tokenStream("field", new StringReader(inputText)); + final TokenStream ts = a.tokenStream("field", inputText); ts.reset(); new TokenStreamToDot(inputText, ts, new PrintWriter(sw)).toDot(); return sw.toString(); @@ -899,7 +899,7 @@ protected void toDotFile(Analyzer a, String inputText, String localFileName) throws IOException { Writer w = new OutputStreamWriter(new FileOutputStream(localFileName), "UTF-8"); - final TokenStream ts = a.tokenStream("field", new StringReader(inputText)); + final TokenStream ts = a.tokenStream("field", inputText); ts.reset(); new TokenStreamToDot(inputText, ts, new PrintWriter(w)).toDot(); w.close(); Index: lucene/test-framework/src/java/org/apache/lucene/analysis/CollationTestBase.java =================================================================== --- lucene/test-framework/src/java/org/apache/lucene/analysis/CollationTestBase.java (revision 1500768) +++ lucene/test-framework/src/java/org/apache/lucene/analysis/CollationTestBase.java (working copy) @@ -19,7 +19,6 @@ import java.io.IOException; -import java.io.StringReader; import java.util.HashMap; import java.util.Map; @@ -33,7 +32,6 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.StorableField; import org.apache.lucene.index.StoredDocument; import org.apache.lucene.index.Term; @@ -199,13 +197,13 @@ doc.add(new Field("tracer", sortData[i][0], customType)); doc.add(new TextField("contents", sortData[i][1], Field.Store.NO)); if (sortData[i][2] != null) - doc.add(new TextField("US", usAnalyzer.tokenStream("US", new StringReader(sortData[i][2])))); + doc.add(new TextField("US", usAnalyzer.tokenStream("US", sortData[i][2]))); if (sortData[i][3] != null) - doc.add(new TextField("France", franceAnalyzer.tokenStream("France", new StringReader(sortData[i][3])))); + doc.add(new TextField("France", franceAnalyzer.tokenStream("France", sortData[i][3]))); if (sortData[i][4] != null) - doc.add(new TextField("Sweden", swedenAnalyzer.tokenStream("Sweden", new StringReader(sortData[i][4])))); + doc.add(new TextField("Sweden", swedenAnalyzer.tokenStream("Sweden", sortData[i][4]))); if (sortData[i][5] != null) - doc.add(new TextField("Denmark", denmarkAnalyzer.tokenStream("Denmark", new StringReader(sortData[i][5])))); + doc.add(new TextField("Denmark", denmarkAnalyzer.tokenStream("Denmark", sortData[i][5]))); writer.addDocument(doc); } writer.forceMerge(1); @@ -260,7 +258,7 @@ for (int i = 0; i < numTestPoints; i++) { String term = _TestUtil.randomSimpleString(random()); - TokenStream ts = analyzer.tokenStream("fake", new StringReader(term)); + TokenStream ts = analyzer.tokenStream("fake", term); TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); BytesRef bytes = termAtt.getBytesRef(); ts.reset(); @@ -279,7 +277,7 @@ for (Map.Entry mapping : map.entrySet()) { String term = mapping.getKey(); BytesRef expected = mapping.getValue(); - TokenStream ts = analyzer.tokenStream("fake", new StringReader(term)); + TokenStream ts = analyzer.tokenStream("fake", term); TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); BytesRef bytes = termAtt.getBytesRef(); ts.reset(); Index: solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java =================================================================== --- solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java (revision 1500768) +++ solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java (working copy) @@ -19,7 +19,6 @@ import java.io.IOException; import java.io.InputStream; -import java.io.StringReader; import java.util.Map; import org.apache.commons.io.IOUtils; @@ -27,8 +26,6 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.collation.ICUCollationKeyAnalyzer; -import org.apache.lucene.index.GeneralField; -import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.StorableField; import org.apache.lucene.search.Query; import org.apache.lucene.search.SortField; @@ -240,7 +237,7 @@ TokenStream source; try { - source = analyzer.tokenStream(field, new StringReader(part)); + source = analyzer.tokenStream(field, part); source.reset(); } catch (IOException e) { throw new RuntimeException("Unable to initialize TokenStream to analyze range part: " + part, e); Index: solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java =================================================================== --- solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java (revision 1500768) +++ solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java (working copy) @@ -86,7 +86,7 @@ TokenStream tokenStream = null; try { - tokenStream = analyzer.tokenStream(context.getFieldName(), new StringReader(value)); + tokenStream = analyzer.tokenStream(context.getFieldName(), value); } catch (IOException e) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); } @@ -140,7 +140,7 @@ protected Set getQueryTokenSet(String query, Analyzer analyzer) { try { final Set tokens = new HashSet(); - final TokenStream tokenStream = analyzer.tokenStream("", new StringReader(query)); + final TokenStream tokenStream = analyzer.tokenStream("", query); final TermToBytesRefAttribute bytesAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class); final BytesRef bytes = bytesAtt.getBytesRef(); Index: solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java =================================================================== --- solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java (revision 1500768) +++ solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java (working copy) @@ -72,7 +72,6 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; -import java.io.StringReader; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; @@ -344,7 +343,7 @@ return query; } StringBuilder norm = new StringBuilder(); - TokenStream tokens = analyzer.tokenStream("", new StringReader(query)); + TokenStream tokens = analyzer.tokenStream("", query); tokens.reset(); CharTermAttribute termAtt = tokens.addAttribute(CharTermAttribute.class); Index: solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java =================================================================== --- solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java (revision 1500768) +++ solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java (working copy) @@ -18,7 +18,6 @@ package org.apache.solr.handler.component; import java.io.IOException; -import java.io.StringReader; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -464,7 +463,7 @@ private Collection getTokens(String q, Analyzer analyzer) throws IOException { Collection result = new ArrayList(); assert analyzer != null; - TokenStream ts = analyzer.tokenStream("", new StringReader(q)); + TokenStream ts = analyzer.tokenStream("", q); ts.reset(); // TODO: support custom attributes CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); Index: solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java =================================================================== --- solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java (revision 1500768) +++ solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java (working copy) @@ -48,7 +48,6 @@ import org.slf4j.LoggerFactory; import java.io.IOException; -import java.io.StringReader; import java.util.*; /** @@ -636,7 +635,7 @@ private TokenStream createAnalyzerTStream(IndexSchema schema, String fieldName, String docText) throws IOException { TokenStream tstream; - TokenStream ts = schema.getAnalyzer().tokenStream(fieldName, new StringReader(docText)); + TokenStream ts = schema.getAnalyzer().tokenStream(fieldName, docText); ts.reset(); tstream = new TokenOrderingFilter(ts, 10); return tstream; Index: solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java =================================================================== --- solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java (revision 1500768) +++ solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java (working copy) @@ -405,7 +405,7 @@ TokenStream source; try { - source = analyzer.tokenStream(field, new StringReader(queryText)); + source = analyzer.tokenStream(field, queryText); source.reset(); } catch (IOException e) { throw new SyntaxError("Unable to initialize TokenStream to analyze query text", e); Index: solr/core/src/java/org/apache/solr/schema/CollationField.java =================================================================== --- solr/core/src/java/org/apache/solr/schema/CollationField.java (revision 1500768) +++ solr/core/src/java/org/apache/solr/schema/CollationField.java (working copy) @@ -19,7 +19,6 @@ import java.io.IOException; import java.io.InputStream; -import java.io.StringReader; import java.text.Collator; import java.text.ParseException; import java.text.RuleBasedCollator; @@ -31,8 +30,6 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.collation.CollationKeyAnalyzer; -import org.apache.lucene.index.GeneralField; -import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.StorableField; import org.apache.lucene.search.Query; import org.apache.lucene.search.SortField; @@ -216,7 +213,7 @@ TokenStream source; try { - source = analyzer.tokenStream(field, new StringReader(part)); + source = analyzer.tokenStream(field, part); source.reset(); } catch (IOException e) { throw new RuntimeException("Unable to initialize TokenStream to analyze range part: " + part, e); Index: solr/core/src/java/org/apache/solr/schema/TextField.java =================================================================== --- solr/core/src/java/org/apache/solr/schema/TextField.java (revision 1500768) +++ solr/core/src/java/org/apache/solr/schema/TextField.java (working copy) @@ -35,7 +35,6 @@ import java.util.List; import java.util.ArrayList; import java.io.IOException; -import java.io.StringReader; /** TextField is the basic type for configurable text analysis. * Analyzers for field types using this implementation should be defined in the schema. @@ -141,7 +140,7 @@ TokenStream source; try { - source = analyzerIn.tokenStream(field, new StringReader(part)); + source = analyzerIn.tokenStream(field, part); source.reset(); } catch (IOException e) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unable to initialize TokenStream to analyze multiTerm term: " + part, e); @@ -181,7 +180,7 @@ TokenStream source; try { - source = analyzer.tokenStream(field, new StringReader(queryText)); + source = analyzer.tokenStream(field, queryText); source.reset(); } catch (IOException e) { throw new RuntimeException("Unable to initialize TokenStream to analyze query text", e); Index: solr/core/src/java/org/apache/solr/spelling/SpellingQueryConverter.java =================================================================== --- solr/core/src/java/org/apache/solr/spelling/SpellingQueryConverter.java (revision 1500768) +++ solr/core/src/java/org/apache/solr/spelling/SpellingQueryConverter.java (working copy) @@ -18,8 +18,6 @@ package org.apache.solr.spelling; import java.io.IOException; -import java.io.Reader; -import java.io.StringReader; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -28,11 +26,10 @@ import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; @@ -160,7 +157,7 @@ flagValue = TERM_PRECEDES_NEW_BOOLEAN_OPERATOR_FLAG; } try { - analyze(result, new StringReader(word), startIndex, flagValue); + analyze(result, word, startIndex, flagValue); } catch (IOException e) { // TODO: shouldn't we log something? } @@ -174,7 +171,7 @@ return result; } - protected void analyze(Collection result, Reader text, int offset, int flagsAttValue) throws IOException { + protected void analyze(Collection result, String text, int offset, int flagsAttValue) throws IOException { TokenStream stream = analyzer.tokenStream("", text); // TODO: support custom attributes CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class); Index: solr/core/src/java/org/apache/solr/spelling/SuggestQueryConverter.java =================================================================== --- solr/core/src/java/org/apache/solr/spelling/SuggestQueryConverter.java (revision 1500768) +++ solr/core/src/java/org/apache/solr/spelling/SuggestQueryConverter.java (working copy) @@ -18,7 +18,6 @@ */ import java.io.IOException; -import java.io.StringReader; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -38,7 +37,7 @@ Collection result = new ArrayList(); try { - analyze(result, new StringReader(original), 0, 0); + analyze(result, original, 0, 0); } catch (IOException e) { throw new RuntimeException(e); } Index: solr/core/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java =================================================================== --- solr/core/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java (revision 1500768) +++ solr/core/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java (working copy) @@ -86,7 +86,7 @@ String text = "one two three si\uD834\uDD1Ex"; // field one - TokenStream input = a.tokenStream("one", new StringReader(text)); + TokenStream input = a.tokenStream("one", text); assertTokenStreamContents(input, new String[] { "\u0001eno", "one", "\u0001owt", "two", "\u0001eerht", "three", "\u0001x\uD834\uDD1Eis", "si\uD834\uDD1Ex" }, @@ -95,7 +95,7 @@ new int[] { 1, 0, 1, 0, 1, 0, 1, 0 } ); // field two - input = a.tokenStream("two", new StringReader(text)); + input = a.tokenStream("two", text); assertTokenStreamContents(input, new String[] { "\u0001eno", "\u0001owt", "\u0001eerht", "\u0001x\uD834\uDD1Eis" }, @@ -104,7 +104,7 @@ new int[] { 1, 1, 1, 1 } ); // field three - input = a.tokenStream("three", new StringReader(text)); + input = a.tokenStream("three", text); assertTokenStreamContents(input, new String[] { "one", "two", "three", "si\uD834\uDD1Ex" }, new int[] { 0, 4, 8, 14 }, Index: solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java =================================================================== --- solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java (revision 1500768) +++ solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java (working copy) @@ -30,7 +30,6 @@ import org.junit.BeforeClass; import org.junit.Test; -import java.io.StringReader; import java.util.Arrays; import java.util.HashMap; import java.util.List; @@ -175,7 +174,7 @@ public void testTermOffsetsTokenStream() throws Exception { String[] multivalued = { "a b c d", "e f g", "h", "i j k l m n" }; Analyzer a1 = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); - TokenStream tokenStream = a1.tokenStream("", new StringReader("a b c d e f g h i j k l m n")); + TokenStream tokenStream = a1.tokenStream("", "a b c d e f g h i j k l m n"); tokenStream.reset(); TermOffsetsTokenStream tots = new TermOffsetsTokenStream( @@ -183,7 +182,7 @@ for( String v : multivalued ){ TokenStream ts1 = tots.getMultiValuedTokenStream( v.length() ); Analyzer a2 = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); - TokenStream ts2 = a2.tokenStream("", new StringReader(v)); + TokenStream ts2 = a2.tokenStream("", v); ts2.reset(); while (ts1.incrementToken()) { Index: solr/core/src/test/org/apache/solr/spelling/SimpleQueryConverter.java =================================================================== --- solr/core/src/test/org/apache/solr/spelling/SimpleQueryConverter.java (revision 1500768) +++ solr/core/src/test/org/apache/solr/spelling/SimpleQueryConverter.java (working copy) @@ -29,7 +29,6 @@ import java.util.Collection; import java.util.HashSet; -import java.io.StringReader; import java.io.IOException; @@ -44,7 +43,7 @@ try { Collection result = new HashSet(); WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_40); - TokenStream ts = analyzer.tokenStream("", new StringReader(origQuery)); + TokenStream ts = analyzer.tokenStream("", origQuery); // TODO: support custom attributes CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); Index: solr/core/src/test/org/apache/solr/TestTrie.java =================================================================== --- solr/core/src/test/org/apache/solr/TestTrie.java (revision 1500768) +++ solr/core/src/test/org/apache/solr/TestTrie.java (working copy) @@ -27,7 +27,6 @@ import org.junit.BeforeClass; import org.junit.Test; -import java.io.StringReader; import java.text.SimpleDateFormat; import java.util.Locale; import java.util.TimeZone; @@ -57,7 +56,7 @@ assertTrue(type instanceof TrieField); String value = String.valueOf(random().nextInt()); - TokenStream ts = type.getAnalyzer().tokenStream("dummy", new StringReader(value)); + TokenStream ts = type.getAnalyzer().tokenStream("dummy", value); OffsetAttribute ofsAtt = ts.addAttribute(OffsetAttribute.class); ts.reset(); int count = 0; @@ -74,7 +73,7 @@ ts.close(); // Test empty one: - ts = type.getAnalyzer().tokenStream("dummy", new StringReader("")); + ts = type.getAnalyzer().tokenStream("dummy", ""); ts.reset(); assertFalse(ts.incrementToken()); ts.end();