Index: modules/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java =================================================================== --- modules/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java (revision 1095935) +++ modules/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java (working copy) @@ -195,4 +195,9 @@ while (stream.incrementToken()) { } } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new SmartChineseAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java (working copy) @@ -50,4 +50,9 @@ checkOneTermReuse(a, "undersøgelse", "undersøgelse"); checkOneTermReuse(a, "undersøg", "undersøg"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new DanishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java (working copy) @@ -219,4 +219,9 @@ assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick", "brown", "fox" }); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new PersianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java (working copy) @@ -75,4 +75,9 @@ Analyzer a = new BulgarianAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); assertAnalyzesTo(a, "строевете строеве", new String[] { "строй", "строеве" }); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new BulgarianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java (working copy) @@ -3,6 +3,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.util.ReusableAnalyzerBase; @@ -219,4 +220,9 @@ new String[] { "仮", "名", "遣", "い", "カタカナ" }, new String[] { "", "", "", "", "" }); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new StandardAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java (working copy) @@ -210,6 +210,13 @@ assertTokenStreamContents(tokenizer, new String[] { "Tokenizer", "\ud801\udc1ctest" }); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + checkRandomData(random, new SimpleAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + checkRandomData(random, new StopAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } final class PayloadSetter extends TokenFilter { Index: modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java (working copy) @@ -309,4 +309,9 @@ dir.close(); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new ClassicAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java (working copy) @@ -102,4 +102,9 @@ assertEquals(0, offsetAtt.startOffset()); assertEquals(4, offsetAtt.endOffset()); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new KeywordAnalyzer(), 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java (working copy) @@ -418,4 +418,9 @@ new String[] { "仮", "名", "遣", "い", "カタカナ" }, new String[] { "", "", "", "", "" }); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, a, 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java (working copy) @@ -63,4 +63,9 @@ checkOneTermReuse(a, "Schaltflächen", "schaltflach"); checkOneTermReuse(a, "Schaltflaechen", "schaltflaech"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new GermanAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java (working copy) @@ -57,4 +57,9 @@ public void testVocabulary() throws IOException { assertVocabulary(analyzer, getDataFile("deminimaltestdata.zip"), "deminimal.txt"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java (working copy) @@ -24,6 +24,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.WhitespaceTokenizer; +import org.apache.lucene.analysis.hu.HungarianAnalyzer; import org.apache.lucene.analysis.util.ReusableAnalyzerBase; import static org.apache.lucene.analysis.util.VocabularyAssert.*; @@ -45,4 +46,9 @@ public void testVocabulary() throws IOException { assertVocabulary(analyzer, getDataFile("delighttestdata.zip"), "delight.txt"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java (working copy) @@ -26,6 +26,7 @@ import org.apache.lucene.analysis.core.KeywordTokenizer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.util.ReusableAnalyzerBase; +import org.junit.Ignore; import static org.apache.lucene.analysis.util.VocabularyAssert.*; @@ -36,20 +37,25 @@ * */ public class TestGermanStemFilter extends BaseTokenStreamTestCase { + Analyzer analyzer = new ReusableAnalyzerBase() { + @Override + protected TokenStreamComponents createComponents(String fieldName, + Reader reader) { + Tokenizer t = new KeywordTokenizer(reader); + return new TokenStreamComponents(t, + new GermanStemFilter(new LowerCaseFilter(TEST_VERSION_CURRENT, t))); + } + }; - public void testStemming() throws Exception { - Analyzer analyzer = new ReusableAnalyzerBase() { - @Override - protected TokenStreamComponents createComponents(String fieldName, - Reader reader) { - Tokenizer t = new KeywordTokenizer(reader); - return new TokenStreamComponents(t, - new GermanStemFilter(new LowerCaseFilter(TEST_VERSION_CURRENT, t))); - } - }; - + public void testStemming() throws Exception { InputStream vocOut = getClass().getResourceAsStream("data.txt"); assertVocabulary(analyzer, vocOut); vocOut.close(); } + + /** blast some random strings through the analyzer */ + @Ignore("bugs!") + public void testRandomStrings() throws Exception { + checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java (working copy) @@ -23,6 +23,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.hu.HungarianAnalyzer; public class TestSwedishAnalyzer extends BaseTokenStreamTestCase { /** This test fails with NPE when the @@ -50,4 +51,9 @@ checkOneTermReuse(a, "jaktkarlarne", "jaktkarlarne"); checkOneTermReuse(a, "jaktkarlens", "jaktkarl"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new SwedishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java (working copy) @@ -45,4 +45,9 @@ public void testVocabulary() throws IOException { assertVocabulary(analyzer, getDataFile("svlighttestdata.zip"), "svlight.txt"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java (working copy) @@ -50,4 +50,9 @@ checkOneTermReuse(a, "edeltäjiinsä", "edeltäj"); checkOneTermReuse(a, "edeltäjistään", "edeltäjistään"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new FinnishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java (working copy) @@ -45,4 +45,9 @@ public void testVocabulary() throws IOException { assertVocabulary(analyzer, getDataFile("filighttestdata.zip"), "filight.txt"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java (working copy) @@ -47,4 +47,9 @@ HindiAnalyzer.getDefaultStopSet(), exclusionSet); checkOneTermReuse(a, "हिंदी", "हिंदी"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new HindiAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java (working copy) @@ -157,4 +157,8 @@ checkOneTermReuse(a, input, expected); } + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new BrazilianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } \ No newline at end of file Index: modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java (working copy) @@ -260,4 +260,9 @@ FrenchAnalyzer a = new FrenchAnalyzer(Version.LUCENE_31); assertAnalyzesTo(a, "Votre", new String[] { }); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new FrenchAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java (working copy) @@ -59,4 +59,9 @@ public void testVocabulary() throws IOException { assertVocabulary(analyzer, getDataFile("frminimaltestdata.zip"), "frminimal.txt"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java (working copy) @@ -159,4 +159,9 @@ public void testVocabulary() throws IOException { assertVocabulary(analyzer, getDataFile("frlighttestdata.zip"), "frlight.txt"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java (working copy) @@ -185,4 +185,9 @@ checkOneTerm(new DutchAnalyzer(TEST_VERSION_CURRENT), input, expected); } + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new DutchAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } + } \ No newline at end of file Index: modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java (working copy) @@ -142,5 +142,10 @@ analyzer, "บริษัทชื่อ XY&Z - คุยกับ xyz@demo.com", new String[] { "บริษัท", "ชื่อ", "xy&z", "คุย", "กับ", "xyz@demo.com" }); - } + } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new ThaiAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java (working copy) @@ -50,4 +50,9 @@ checkOneTermReuse(a, "havnedistriktene", "havnedistriktene"); checkOneTermReuse(a, "havnedistrikter", "havnedistrikt"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new NorwegianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java (working copy) @@ -50,4 +50,9 @@ checkOneTermReuse(a, "babakocsi", "babakocsi"); checkOneTermReuse(a, "babakocsijáért", "babakocs"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new HungarianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java (working copy) @@ -50,4 +50,9 @@ checkOneTermReuse(a, "արծիվներ", "արծիվներ"); checkOneTermReuse(a, "արծիվ", "արծ"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new ArmenianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java (working copy) @@ -50,4 +50,9 @@ checkOneTermReuse(a, "absenţa", "absenţa"); checkOneTermReuse(a, "absenţi", "absenţ"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new RomanianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java (working copy) @@ -50,4 +50,9 @@ checkOneTermReuse(a, "tirgiem", "tirgiem"); checkOneTermReuse(a, "tirgus", "tirg"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new LatvianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java (working copy) @@ -50,4 +50,9 @@ checkOneTermReuse(a, "llengües", "llengües"); checkOneTermReuse(a, "llengua", "llengu"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new CatalanAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java (working copy) @@ -66,4 +66,9 @@ public void testVocabulary() throws IOException { assertVocabulary(analyzer, getDataFile("ptrslptestdata.zip"), "ptrslp.txt"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java (working copy) @@ -50,4 +50,9 @@ checkOneTermReuse(a, "quilométricas", "quilométricas"); checkOneTermReuse(a, "quilométricos", "quilométr"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new PortugueseAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java (working copy) @@ -66,4 +66,9 @@ public void testVocabulary() throws IOException { assertVocabulary(analyzer, getDataFile("ptminimaltestdata.zip"), "ptminimal.txt"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java (working copy) @@ -92,4 +92,9 @@ public void testVocabulary() throws IOException { assertVocabulary(analyzer, getDataFile("ptlighttestdata.zip"), "ptlight.txt"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java (working copy) @@ -50,4 +50,9 @@ checkOneTermReuse(a, "ağacı", "ağacı"); checkOneTermReuse(a, "ağaç", "ağaç"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new TurkishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java (working copy) @@ -64,4 +64,9 @@ new String[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представление" }); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new RussianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java (working copy) @@ -45,4 +45,9 @@ public void testVocabulary() throws IOException { assertVocabulary(analyzer, getDataFile("rulighttestdata.zip"), "rulight.txt"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java (working copy) @@ -50,4 +50,9 @@ checkOneTermReuse(a, "peledakan", "peledakan"); checkOneTermReuse(a, "pembunuhan", "bunuh"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new IndonesianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java (working copy) @@ -87,4 +87,9 @@ assertAnalyzesToReuse(a, "ΠΡΟΫΠΟΘΕΣΕΙΣ Άψογος, ο μεστός και οι άλλοι", new String[] { "προυποθεσ", "αψογ", "μεστ", "αλλ" }); } - } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new GreekAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } +} Index: modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java (working copy) @@ -50,4 +50,9 @@ checkOneTermReuse(a, "correspondente", "correspondente"); checkOneTermReuse(a, "corresponderá", "correspond"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new GalicianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java (working copy) @@ -98,4 +98,9 @@ assertAnalyzesTo(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهد" }); assertAnalyzesToReuse(a, "كبيرة the quick ساهدهات", new String[] { "كبير","the", "quick", "ساهد" }); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new ArabicAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java (working copy) @@ -52,4 +52,9 @@ checkOneTermReuse(a, "books", "books"); checkOneTermReuse(a, "book", "book"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new EnglishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java (working copy) @@ -51,4 +51,9 @@ checkOneTerm(analyzer, "congress", "congress"); checkOneTerm(analyzer, "serious", "serious"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java (working copy) @@ -36,21 +36,21 @@ /** * Test the PorterStemFilter with Martin Porter's test data. */ -public class TestPorterStemFilter extends BaseTokenStreamTestCase { +public class TestPorterStemFilter extends BaseTokenStreamTestCase { + Analyzer a = new ReusableAnalyzerBase() { + @Override + protected TokenStreamComponents createComponents(String fieldName, + Reader reader) { + Tokenizer t = new KeywordTokenizer(reader); + return new TokenStreamComponents(t, new PorterStemFilter(t)); + } + }; + /** * Run the stemmer against all strings in voc.txt * The output should be the same as the string in output.txt */ public void testPorterStemFilter() throws Exception { - Analyzer a = new ReusableAnalyzerBase() { - @Override - protected TokenStreamComponents createComponents(String fieldName, - Reader reader) { - Tokenizer t = new KeywordTokenizer(reader); - return new TokenStreamComponents(t, new PorterStemFilter(t)); - } - }; - assertVocabulary(a, getDataFile("porterTestData.zip"), "voc.txt", "output.txt"); } @@ -61,4 +61,9 @@ TokenStream filter = new PorterStemFilter(new KeywordMarkerFilter(tokenizer, set)); assertTokenStreamContents(filter, new String[] {"yourselves", "your"}); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, a, 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKTokenizer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKTokenizer.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKTokenizer.java (working copy) @@ -270,4 +270,9 @@ newToken("test", 0, 4, CJKTokenizer.SINGLE_TOKEN_TYPE), newToken("あい", 4, 6, CJKTokenizer.DOUBLE_TOKEN_TYPE) }); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new CJKAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java (working copy) @@ -50,4 +50,9 @@ checkOneTermReuse(a, "chicana", "chican"); checkOneTermReuse(a, "chicano", "chicano"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new SpanishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishLightStemFilter.java (working copy) @@ -45,4 +45,9 @@ public void testVocabulary() throws IOException { assertVocabulary(analyzer, getDataFile("eslighttestdata.zip"), "eslight.txt"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java (working copy) @@ -50,4 +50,9 @@ checkOneTermReuse(a, "zaldiak", "zaldiak"); checkOneTermReuse(a, "mendiari", "mendi"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new BasqueAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java (working copy) @@ -50,4 +50,9 @@ checkOneTermReuse(a, "abbandonata", "abbandonata"); checkOneTermReuse(a, "abbandonati", "abbandon"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new ItalianAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianLightStemFilter.java (working copy) @@ -45,4 +45,9 @@ public void testVocabulary() throws IOException { assertVocabulary(analyzer, getDataFile("itlighttestdata.zip"), "itlight.txt"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, analyzer, 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java (revision 1095935) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java (working copy) @@ -67,4 +67,9 @@ CzechAnalyzer cz = new CzechAnalyzer(TEST_VERSION_CURRENT, CharArraySet.EMPTY_SET, set); assertAnalyzesTo(cz, "hole desek", new String[] {"hole", "desk"}); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new CzechAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } Index: modules/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java =================================================================== --- modules/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java (revision 1095935) +++ modules/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java (working copy) @@ -50,4 +50,9 @@ checkOneTermReuse(a, "studenta", "studenta"); checkOneTermReuse(a, "studenci", "student"); } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new PolishAnalyzer(TEST_VERSION_CURRENT), 10000*RANDOM_MULTIPLIER); + } } Index: lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java =================================================================== --- lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java (revision 1096173) +++ lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java (working copy) @@ -111,4 +111,8 @@ assertAnalyzesToReuse(analyzer, testString, new String[] { "t" }); } + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + checkRandomData(random, new MockAnalyzer(random), 10000*RANDOM_MULTIPLIER); + } } Index: lucene/src/test-framework/org/apache/lucene/analysis/BaseTokenStreamTestCase.java =================================================================== --- lucene/src/test-framework/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (revision 1096173) +++ lucene/src/test-framework/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (working copy) @@ -19,11 +19,15 @@ import java.io.StringReader; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; import org.apache.lucene.analysis.tokenattributes.*; import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; /** * Base class for all Lucene unit tests that use TokenStreams. @@ -229,4 +233,39 @@ assertAnalyzesToReuse(a, input, new String[]{expected}); } + // simple utility method for blasting tokenstreams with data to make sure they don't do anything crazy + + public static void checkRandomData(Random random, Analyzer a, int iterations) throws IOException { + checkRandomData(random, a, iterations, 20); + } + + public static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength) throws IOException { + for (int i = 0; i < iterations; i++) { + String text; + switch(_TestUtil.nextInt(random, 0, 3)) { + case 0: + text = _TestUtil.randomSimpleString(random); + break; + case 1: + text = _TestUtil.randomRealisticUnicodeString(random, maxWordLength); + break; + default: + text = _TestUtil.randomUnicodeString(random, maxWordLength); + } + + TokenStream ts = a.reusableTokenStream("dummy", new StringReader(text)); + assertTrue("has no CharTermAttribute", ts.hasAttribute(CharTermAttribute.class)); + CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class); + List tokens = new ArrayList(); + ts.reset(); + while (ts.incrementToken()) { + tokens.add(termAtt.toString()); + // TODO: we could collect offsets etc here for better checking that reset() really works. + } + ts.close(); + // verify reusing is "reproducable" and also get the normal tokenstream sanity checks + if (!tokens.isEmpty()) + assertAnalyzesToReuse(a, text, tokens.toArray(new String[tokens.size()])); + } + } }