Index: lucene/src/test/org/apache/lucene/queryParser/TestMultiAnalyzer.java =================================================================== --- lucene/src/test/org/apache/lucene/queryParser/TestMultiAnalyzer.java (revision 944954) +++ lucene/src/test/org/apache/lucene/queryParser/TestMultiAnalyzer.java (working copy) @@ -20,10 +20,9 @@ import java.io.Reader; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.LowerCaseFilter; +import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; @@ -135,9 +134,8 @@ @Override public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new StandardTokenizer(TEST_VERSION_CURRENT, reader); + TokenStream result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); result = new TestFilter(result); - result = new LowerCaseFilter(TEST_VERSION_CURRENT, result); return result; } } @@ -203,9 +201,8 @@ @Override public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new StandardTokenizer(TEST_VERSION_CURRENT, reader); + TokenStream result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); result = new TestPosIncrementFilter(result); - result = new LowerCaseFilter(TEST_VERSION_CURRENT, result); return result; } } Index: lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java =================================================================== --- lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java (revision 944954) +++ lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java (working copy) @@ -27,19 +27,14 @@ import java.util.GregorianCalendar; import java.util.HashSet; import java.util.Locale; -import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.KeywordAnalyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenFilter; import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.StopAnalyzer; -import org.apache.lucene.analysis.StopFilter; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.WhitespaceAnalyzer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.DateField; @@ -66,6 +61,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.MockRAMDirectory; import org.apache.lucene.util.LocalizedTestCase; +import org.apache.lucene.util.automaton.BasicAutomata; import org.apache.lucene.util.automaton.CharacterRunAutomaton; import org.apache.lucene.util.automaton.RegExp; @@ -256,8 +252,8 @@ public void testSimple() throws Exception { assertQueryEquals("term term term", null, "term term term"); - assertQueryEquals("türm term term", new WhitespaceAnalyzer(TEST_VERSION_CURRENT), "türm term term"); - assertQueryEquals("ümlaut", new WhitespaceAnalyzer(TEST_VERSION_CURRENT), "ümlaut"); + assertQueryEquals("türm term term", new MockAnalyzer(), "türm term term"); + assertQueryEquals("ümlaut", new MockAnalyzer(), "ümlaut"); assertQueryEquals("\"\"", new KeywordAnalyzer(), ""); assertQueryEquals("foo:\"\"", new KeywordAnalyzer(), "foo:"); @@ -304,7 +300,7 @@ assertQueryEquals("+title:(dog OR cat) -author:\"bob dole\"", null, "+(title:dog title:cat) -author:\"bob dole\""); - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new StandardAnalyzer(TEST_VERSION_CURRENT)); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer()); // make sure OR is the default: assertEquals(QueryParser.OR_OPERATOR, qp.getDefaultOperator()); qp.setDefaultOperator(QueryParser.AND_OPERATOR); @@ -314,7 +310,7 @@ } public void testPunct() throws Exception { - Analyzer a = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, false); assertQueryEquals("a&b", a, "a&b"); assertQueryEquals("a&&b", a, "a&&b"); assertQueryEquals(".NET", a, ".NET"); @@ -334,7 +330,7 @@ assertQueryEquals("term 1.0 1 2", null, "term"); assertQueryEquals("term term1 term2", null, "term term term"); - Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, true); assertQueryEquals("3", a, "3"); assertQueryEquals("term 1.0 1 2", a, "term 1.0 1 2"); assertQueryEquals("term term1 term2", a, "term term1 term2"); @@ -476,7 +472,7 @@ public void testFarsiRangeCollating() throws Exception { RAMDirectory ramDir = new RAMDirectory(); - IndexWriter iw = new IndexWriter(ramDir, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); + IndexWriter iw = new IndexWriter(ramDir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); Document doc = new Document(); doc.add(new Field("content","\u0633\u0627\u0628", Field.Store.YES, Field.Index.NOT_ANALYZED)); @@ -484,7 +480,7 @@ iw.close(); IndexSearcher is = new IndexSearcher(ramDir, true); - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "content", new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "content", new MockAnalyzer(MockTokenizer.WHITESPACE, false)); // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi @@ -623,7 +619,7 @@ } public void testEscaped() throws Exception { - Analyzer a = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, false); /*assertQueryEquals("\\[brackets", a, "\\[brackets"); assertQueryEquals("\\[brackets", null, "brackets"); @@ -717,7 +713,7 @@ } public void testQueryStringEscaping() throws Exception { - Analyzer a = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, false); assertEscapedQueryEquals("a-b:c", a, "a\\-b\\:c"); assertEscapedQueryEquals("a+b:c", a, "a\\+b\\:c"); @@ -802,9 +798,8 @@ public void testBoost() throws Exception { - Set stopWords = new HashSet(1); - stopWords.add("on"); - StandardAnalyzer oneStopAnalyzer = new StandardAnalyzer(TEST_VERSION_CURRENT, stopWords); + CharacterRunAutomaton stopWords = new CharacterRunAutomaton(BasicAutomata.makeString("on")); + Analyzer oneStopAnalyzer = new MockAnalyzer(MockTokenizer.SIMPLE, true, stopWords, true); QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", oneStopAnalyzer); Query q = qp.parse("on^1.0"); assertNotNull(q); @@ -817,7 +812,7 @@ q = qp.parse("\"on\"^1.0"); assertNotNull(q); - QueryParser qp2 = new QueryParser(TEST_VERSION_CURRENT, "field", new StandardAnalyzer(TEST_VERSION_CURRENT)); + QueryParser qp2 = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)); q = qp2.parse("the^3"); // "the" is a stop word so the result is an empty query: assertNotNull(q); @@ -846,7 +841,7 @@ public void testCustomQueryParserWildcard() { try { - new QPTestParser("contents", new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).parse("a?t"); + new QPTestParser("contents", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("a?t"); fail("Wildcard queries should not be allowed"); } catch (ParseException expected) { // expected exception @@ -855,7 +850,7 @@ public void testCustomQueryParserFuzzy() throws Exception { try { - new QPTestParser("contents", new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).parse("xunit~"); + new QPTestParser("contents", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("xunit~"); fail("Fuzzy queries should not be allowed"); } catch (ParseException expected) { // expected exception @@ -865,7 +860,7 @@ public void testBooleanQuery() throws Exception { BooleanQuery.setMaxClauseCount(2); try { - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(MockTokenizer.WHITESPACE, false)); qp.parse("one two three"); fail("ParseException expected due to too many boolean clauses"); } catch (ParseException expected) { @@ -877,7 +872,7 @@ * This test differs from TestPrecedenceQueryParser */ public void testPrecedence() throws Exception { - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(MockTokenizer.WHITESPACE, false)); Query query1 = qp.parse("A AND B OR C AND D"); Query query2 = qp.parse("+A +B +C +D"); assertEquals(query1, query2); @@ -885,7 +880,7 @@ public void testLocalDateFormat() throws IOException, ParseException { RAMDirectory ramDir = new RAMDirectory(); - IndexWriter iw = new IndexWriter(ramDir, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); + IndexWriter iw = new IndexWriter(ramDir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); addDateDoc("a", 2005, 12, 2, 10, 15, 33, iw); addDateDoc("b", 2005, 12, 4, 22, 15, 00, iw); iw.close(); @@ -901,7 +896,7 @@ public void testStarParsing() throws Exception { final int[] type = new int[1]; - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new WhitespaceAnalyzer(TEST_VERSION_CURRENT)) { + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(MockTokenizer.WHITESPACE, false)) { @Override protected Query getWildcardQuery(String field, String termStr) throws ParseException { // override error checking of superclass @@ -994,7 +989,7 @@ } public void testMatchAllDocs() throws Exception { - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(MockTokenizer.WHITESPACE, false)); assertEquals(new MatchAllDocsQuery(), qp.parse("*:*")); assertEquals(new MatchAllDocsQuery(), qp.parse("(*:*)")); BooleanQuery bq = (BooleanQuery)qp.parse("+*:* -*:*"); @@ -1003,7 +998,7 @@ } private void assertHits(int expected, String query, IndexSearcher is) throws ParseException, IOException { - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "date", new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "date", new MockAnalyzer(MockTokenizer.WHITESPACE, false)); qp.setLocale(Locale.ENGLISH); Query q = qp.parse(query); ScoreDoc[] hits = is.search(q, null, 1000).scoreDocs; @@ -1031,7 +1026,7 @@ // "match" public void testPositionIncrements() throws Exception { Directory dir = new MockRAMDirectory(); - Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true); IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, a)); Document doc = new Document(); doc.add(new Field("f", "the wizard of ozzy", Field.Store.NO, Field.Index.ANALYZED)); Index: lucene/src/test/org/apache/lucene/collation/CollationTestBase.java =================================================================== --- lucene/src/test/org/apache/lucene/collation/CollationTestBase.java (revision 944954) +++ lucene/src/test/org/apache/lucene/collation/CollationTestBase.java (working copy) @@ -19,7 +19,8 @@ import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; @@ -173,7 +174,7 @@ String usResult) throws Exception { RAMDirectory indexStore = new RAMDirectory(); IndexWriter writer = new IndexWriter(indexStore, new IndexWriterConfig( - TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); + TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); // document data: // the tracer field is used to determine which document was hit Index: lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java (revision 944954) +++ lucene/src/test/org/apache/lucene/search/TestTermRangeQuery.java (working copy) @@ -17,7 +17,6 @@ * limitations under the License. */ -import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; @@ -25,6 +24,8 @@ import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; @@ -328,7 +329,7 @@ } private void initializeIndex(String[] values) throws IOException { - initializeIndex(values, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); + initializeIndex(values, new MockAnalyzer(MockTokenizer.WHITESPACE, false)); } private void initializeIndex(String[] values, Analyzer analyzer) throws IOException { @@ -341,7 +342,7 @@ } private void addDoc(String content) throws IOException { - IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND)); + IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)).setOpenMode(OpenMode.APPEND)); insertDoc(writer, content); writer.close(); } Index: lucene/src/test/org/apache/lucene/search/TestSloppyPhraseQuery.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestSloppyPhraseQuery.java (revision 944954) +++ lucene/src/test/org/apache/lucene/search/TestSloppyPhraseQuery.java (working copy) @@ -18,6 +18,8 @@ */ import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -116,7 +118,7 @@ query.setSlop(slop); RAMDirectory ramDir = new RAMDirectory(); - IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); + IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); writer.addDocument(doc); writer.close(); Index: lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java (revision 944954) +++ lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java (working copy) @@ -67,7 +67,7 @@ "X 4 5 6" }; small = new RAMDirectory(); - IndexWriter writer = new IndexWriter(small, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); + IndexWriter writer = new IndexWriter(small, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); for (int i = 0; i < data.length; i++) { Document doc = new Document(); Index: lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java =================================================================== --- lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java (revision 944954) +++ lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java (working copy) @@ -21,7 +21,9 @@ import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenFilter; +import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; @@ -57,7 +59,7 @@ // create test index mDirectory = new RAMDirectory(); final IndexWriter writer = new IndexWriter(mDirectory, - new IndexWriterConfig(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT))); + new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true))); addDocument(writer, "1", "I think it should work."); addDocument(writer, "2", "I think it should work."); addDocument(writer, "3", "I think it should work."); Index: lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced2.java =================================================================== --- lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced2.java (revision 944954) +++ lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced2.java (working copy) @@ -19,7 +19,9 @@ import java.io.IOException; -import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenFilter; +import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; @@ -44,7 +46,7 @@ // create test index final IndexWriter writer = new IndexWriter(mDirectory, new IndexWriterConfig(TEST_VERSION_CURRENT, - new StandardAnalyzer(TEST_VERSION_CURRENT)).setOpenMode( + new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)).setOpenMode( OpenMode.APPEND)); addDocument(writer, "A", "Should we, could we, would we?"); addDocument(writer, "B", "It should. Should it?"); Index: lucene/src/test/org/apache/lucene/index/TestPayloadProcessorProvider.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestPayloadProcessorProvider.java (revision 944954) +++ lucene/src/test/org/apache/lucene/index/TestPayloadProcessorProvider.java (working copy) @@ -23,8 +23,9 @@ import java.util.HashMap; import java.util.Map; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.document.Document; @@ -118,8 +119,7 @@ private static final int NUM_DOCS = 10; private IndexWriterConfig getConfig() { - return new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer( - TEST_VERSION_CURRENT)); + return new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)); } private void populateDirs(Directory[] dirs, boolean multipleCommits) Index: lucene/src/test/org/apache/lucene/index/TestIndexWriter.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (revision 944954) +++ lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (working copy) @@ -40,13 +40,9 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenFilter; import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.StopAnalyzer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.document.Document; @@ -1722,7 +1718,7 @@ @Override public TokenStream tokenStream(String fieldName, Reader reader) { - return new TokenFilter(new StandardTokenizer(TEST_VERSION_CURRENT, reader)) { + return new TokenFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true)) { private int count = 0; @Override Index: lucene/src/test/org/apache/lucene/index/DocHelper.java =================================================================== --- lucene/src/test/org/apache/lucene/index/DocHelper.java (revision 944954) +++ lucene/src/test/org/apache/lucene/index/DocHelper.java (working copy) @@ -23,7 +23,8 @@ import java.util.Map; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Fieldable; @@ -219,7 +220,7 @@ */ public static SegmentInfo writeDoc(Directory dir, Document doc) throws IOException { - return writeDoc(dir, new WhitespaceAnalyzer(TEST_VERSION_CURRENT), Similarity.getDefault(), doc); + return writeDoc(dir, new MockAnalyzer(MockTokenizer.WHITESPACE, false), Similarity.getDefault(), doc); } /** Index: lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java (revision 944954) +++ lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java (working copy) @@ -19,7 +19,8 @@ import java.io.IOException; -import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.search.IndexSearcher; @@ -41,7 +42,7 @@ Directory dir = new MockRAMDirectory(); IndexWriter modifier = new IndexWriter(dir, new IndexWriterConfig( - TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setMaxBufferedDeleteTerms(1)); + TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)).setMaxBufferedDeleteTerms(1)); for (int i = 0; i < keywords.length; i++) { Document doc = new Document(); @@ -76,7 +77,7 @@ Directory dir = new MockRAMDirectory(); IndexWriter modifier = new IndexWriter(dir, new IndexWriterConfig( - TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setMaxBufferedDocs(2) + TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(2) .setMaxBufferedDeleteTerms(2)); int id = 0; @@ -110,7 +111,7 @@ public void testMaxBufferedDeletes() throws IOException { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( - TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setMaxBufferedDeleteTerms(1)); + TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)).setMaxBufferedDeleteTerms(1)); writer.deleteDocuments(new Term("foobar", "1")); writer.deleteDocuments(new Term("foobar", "1")); writer.deleteDocuments(new Term("foobar", "1")); @@ -124,7 +125,7 @@ for(int t=0;t<2;t++) { Directory dir = new MockRAMDirectory(); IndexWriter modifier = new IndexWriter(dir, new IndexWriterConfig( - TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setMaxBufferedDocs(4) + TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(4) .setMaxBufferedDeleteTerms(4)); int id = 0; @@ -165,7 +166,7 @@ public void testBothDeletes() throws IOException { Directory dir = new MockRAMDirectory(); IndexWriter modifier = new IndexWriter(dir, new IndexWriterConfig( - TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setMaxBufferedDocs(100) + TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(100) .setMaxBufferedDeleteTerms(100)); int id = 0; @@ -197,7 +198,7 @@ public void testBatchDeletes() throws IOException { Directory dir = new MockRAMDirectory(); IndexWriter modifier = new IndexWriter(dir, new IndexWriterConfig( - TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setMaxBufferedDocs(2) + TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(2) .setMaxBufferedDeleteTerms(2)); int id = 0; @@ -240,7 +241,7 @@ public void testDeleteAll() throws IOException { Directory dir = new MockRAMDirectory(); IndexWriter modifier = new IndexWriter(dir, new IndexWriterConfig( - TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setMaxBufferedDocs(2) + TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(2) .setMaxBufferedDeleteTerms(2)); int id = 0; @@ -286,7 +287,7 @@ public void testDeleteAllRollback() throws IOException { Directory dir = new MockRAMDirectory(); IndexWriter modifier = new IndexWriter(dir, new IndexWriterConfig( - TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setMaxBufferedDocs(2) + TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(2) .setMaxBufferedDeleteTerms(2)); int id = 0; @@ -323,7 +324,7 @@ public void testDeleteAllNRT() throws IOException { Directory dir = new MockRAMDirectory(); IndexWriter modifier = new IndexWriter(dir, new IndexWriterConfig( - TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setMaxBufferedDocs(2) + TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(2) .setMaxBufferedDeleteTerms(2)); int id = 0; @@ -412,7 +413,7 @@ // First build up a starting index: MockRAMDirectory startDir = new MockRAMDirectory(); - IndexWriter writer = new IndexWriter(startDir, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); + IndexWriter writer = new IndexWriter(startDir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); for (int i = 0; i < 157; i++) { Document d = new Document(); d.add(new Field("id", Integer.toString(i), Field.Store.YES, @@ -435,7 +436,7 @@ MockRAMDirectory dir = new MockRAMDirectory(startDir); dir.setPreventDoubleWrite(false); IndexWriter modifier = new IndexWriter(dir, new IndexWriterConfig( - TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setMaxBufferedDocs(1000) + TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(1000) .setMaxBufferedDeleteTerms(1000)); // For each disk size, first try to commit against @@ -639,7 +640,7 @@ MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter modifier = new IndexWriter(dir, new IndexWriterConfig( - TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setMaxBufferedDeleteTerms(2)); + TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)).setMaxBufferedDeleteTerms(2)); LogMergePolicy lmp = (LogMergePolicy) modifier.getConfig().getMergePolicy(); lmp.setUseCompoundFile(true); lmp.setUseCompoundDocStore(true); @@ -748,7 +749,7 @@ String[] text = { "Amsterdam", "Venice" }; MockRAMDirectory dir = new MockRAMDirectory(); - IndexWriter modifier = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); + IndexWriter modifier = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); modifier.commit(); dir.failOn(failure.reset()); @@ -775,7 +776,7 @@ public void testDeleteNullQuery() throws IOException { Directory dir = new MockRAMDirectory(); - IndexWriter modifier = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); + IndexWriter modifier = new IndexWriter(dir, new MockAnalyzer(MockTokenizer.WHITESPACE, false), IndexWriter.MaxFieldLength.UNLIMITED); for (int i = 0; i < 5; i++) { addDoc(modifier, i, 2*i); Index: lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java =================================================================== --- lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java (revision 944954) +++ lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java (working copy) @@ -30,20 +30,14 @@ import java.util.List; import java.util.Locale; import java.util.Map; -import java.util.Collections; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.KeywordAnalyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenFilter; import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.SimpleAnalyzer; -import org.apache.lucene.analysis.StopAnalyzer; -import org.apache.lucene.analysis.StopFilter; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.WhitespaceAnalyzer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; @@ -56,7 +50,6 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; import org.apache.lucene.messages.MessageImpl; -import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.queryParser.core.QueryNodeException; import org.apache.lucene.queryParser.core.messages.QueryParserMessages; import org.apache.lucene.queryParser.core.nodes.FuzzyQueryNode; @@ -81,6 +74,7 @@ import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.MockRAMDirectory; import org.apache.lucene.util.LocalizedTestCase; +import org.apache.lucene.util.automaton.BasicAutomata; import org.apache.lucene.util.automaton.CharacterRunAutomaton; import org.apache.lucene.util.automaton.RegExp; @@ -318,7 +312,7 @@ } public void testConstantScoreAutoRewrite() throws Exception { - StandardQueryParser qp = new StandardQueryParser(new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); + StandardQueryParser qp = new StandardQueryParser(new MockAnalyzer(MockTokenizer.WHITESPACE, false)); Query q = qp.parse("foo*bar", "field"); assertTrue(q instanceof WildcardQuery); assertEquals(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT, ((MultiTermQuery) q).getRewriteMethod()); @@ -343,9 +337,9 @@ public void testSimple() throws Exception { assertQueryEquals("\"term germ\"~2", null, "\"term germ\"~2"); assertQueryEquals("term term term", null, "term term term"); - assertQueryEquals("t�rm term term", new WhitespaceAnalyzer(TEST_VERSION_CURRENT), + assertQueryEquals("t�rm term term", new MockAnalyzer(MockTokenizer.WHITESPACE, false), "t�rm term term"); - assertQueryEquals("�mlaut", new WhitespaceAnalyzer(TEST_VERSION_CURRENT), "�mlaut"); + assertQueryEquals("�mlaut", new MockAnalyzer(MockTokenizer.WHITESPACE, false), "�mlaut"); assertQueryEquals("\"\"", new KeywordAnalyzer(), ""); assertQueryEquals("foo:\"\"", new KeywordAnalyzer(), "foo:"); @@ -402,7 +396,7 @@ } public void testPunct() throws Exception { - Analyzer a = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, false); assertQueryEquals("a&b", a, "a&b"); assertQueryEquals("a&&b", a, "a&&b"); assertQueryEquals(".NET", a, ".NET"); @@ -423,7 +417,7 @@ assertQueryEquals("term 1.0 1 2", null, "term"); assertQueryEquals("term term1 term2", null, "term term term"); - Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, false); assertQueryEquals("3", a, "3"); assertQueryEquals("term 1.0 1 2", a, "term 1.0 1 2"); assertQueryEquals("term term1 term2", a, "term term1 term2"); @@ -577,7 +571,7 @@ public void testFarsiRangeCollating() throws Exception { RAMDirectory ramDir = new RAMDirectory(); - IndexWriter iw = new IndexWriter(ramDir, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); + IndexWriter iw = new IndexWriter(ramDir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); Document doc = new Document(); doc.add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES, Field.Index.NOT_ANALYZED)); @@ -586,7 +580,7 @@ IndexSearcher is = new IndexSearcher(ramDir, true); StandardQueryParser qp = new StandardQueryParser(); - qp.setAnalyzer(new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); + qp.setAnalyzer(new MockAnalyzer(MockTokenizer.WHITESPACE, false)); // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in // RuleBasedCollator. However, the Arabic Locale seems to order the @@ -740,7 +734,7 @@ } public void testEscaped() throws Exception { - Analyzer a = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, false); /* * assertQueryEquals("\\[brackets", a, "\\[brackets"); @@ -839,7 +833,7 @@ } public void testQueryStringEscaping() throws Exception { - Analyzer a = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, false); assertEscapedQueryEquals("a-b:c", a, "a\\-b\\:c"); assertEscapedQueryEquals("a+b:c", a, "a\\+b\\:c"); @@ -908,7 +902,8 @@ } public void testBoost() throws Exception { - StandardAnalyzer oneStopAnalyzer = new StandardAnalyzer(TEST_VERSION_CURRENT, Collections.singleton("on")); + CharacterRunAutomaton stopSet = new CharacterRunAutomaton(BasicAutomata.makeString("on")); + Analyzer oneStopAnalyzer = new MockAnalyzer(MockTokenizer.SIMPLE, true, stopSet, true); StandardQueryParser qp = new StandardQueryParser(); qp.setAnalyzer(oneStopAnalyzer); @@ -924,7 +919,7 @@ assertNotNull(q); StandardQueryParser qp2 = new StandardQueryParser(); - qp2.setAnalyzer(new StandardAnalyzer(TEST_VERSION_CURRENT)); + qp2.setAnalyzer(new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)); q = qp2.parse("the^3", "field"); // "the" is a stop word so the result is an empty query: @@ -954,7 +949,7 @@ public void testCustomQueryParserWildcard() { try { - new QPTestParser(new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).parse("a?t", "contents"); + new QPTestParser(new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("a?t", "contents"); fail("Wildcard queries should not be allowed"); } catch (QueryNodeException expected) { // expected exception @@ -963,7 +958,7 @@ public void testCustomQueryParserFuzzy() throws Exception { try { - new QPTestParser(new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).parse("xunit~", "contents"); + new QPTestParser(new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("xunit~", "contents"); fail("Fuzzy queries should not be allowed"); } catch (QueryNodeException expected) { // expected exception @@ -974,7 +969,7 @@ BooleanQuery.setMaxClauseCount(2); try { StandardQueryParser qp = new StandardQueryParser(); - qp.setAnalyzer(new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); + qp.setAnalyzer(new MockAnalyzer(MockTokenizer.WHITESPACE, false)); qp.parse("one two three", "field"); fail("ParseException expected due to too many boolean clauses"); @@ -988,7 +983,7 @@ */ public void testPrecedence() throws Exception { StandardQueryParser qp = new StandardQueryParser(); - qp.setAnalyzer(new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); + qp.setAnalyzer(new MockAnalyzer(MockTokenizer.WHITESPACE, false)); Query query1 = qp.parse("A AND B OR C AND D", "field"); Query query2 = qp.parse("+A +B +C +D", "field"); @@ -999,7 +994,7 @@ public void testLocalDateFormat() throws IOException, QueryNodeException { RAMDirectory ramDir = new RAMDirectory(); - IndexWriter iw = new IndexWriter(ramDir, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); + IndexWriter iw = new IndexWriter(ramDir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); addDateDoc("a", 2005, 12, 2, 10, 15, 33, iw); addDateDoc("b", 2005, 12, 4, 22, 15, 00, iw); iw.close(); @@ -1123,7 +1118,7 @@ public void testMatchAllDocs() throws Exception { StandardQueryParser qp = new StandardQueryParser(); - qp.setAnalyzer(new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); + qp.setAnalyzer(new MockAnalyzer(MockTokenizer.WHITESPACE, false)); assertEquals(new MatchAllDocsQuery(), qp.parse("*:*", "field")); assertEquals(new MatchAllDocsQuery(), qp.parse("(*:*)", "field")); @@ -1135,7 +1130,7 @@ private void assertHits(int expected, String query, IndexSearcher is) throws IOException, QueryNodeException { StandardQueryParser qp = new StandardQueryParser(); - qp.setAnalyzer(new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); + qp.setAnalyzer(new MockAnalyzer(MockTokenizer.WHITESPACE, false)); qp.setLocale(Locale.ENGLISH); Query q = qp.parse(query, "date"); Index: lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerQPHelper.java =================================================================== --- lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerQPHelper.java (revision 944954) +++ lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerQPHelper.java (working copy) @@ -21,6 +21,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.LowerCaseFilter; +import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.standard.StandardTokenizer; @@ -152,9 +153,8 @@ @Override public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new StandardTokenizer(TEST_VERSION_CURRENT, reader); + TokenStream result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); result = new TestFilter(result); - result = new LowerCaseFilter(TEST_VERSION_CURRENT, result); return result; } } @@ -222,9 +222,8 @@ @Override public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new StandardTokenizer(TEST_VERSION_CURRENT, reader); + TokenStream result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); result = new TestPosIncrementFilter(result); - result = new LowerCaseFilter(TEST_VERSION_CURRENT, result); return result; } } Index: lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerWrapper.java =================================================================== --- lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerWrapper.java (revision 944954) +++ lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerWrapper.java (working copy) @@ -20,10 +20,9 @@ import java.io.Reader; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.LowerCaseFilter; +import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.TermAttribute; @@ -146,9 +145,8 @@ @Override public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new StandardTokenizer(TEST_VERSION_CURRENT, reader); + TokenStream result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); result = new TestFilter(result); - result = new LowerCaseFilter(TEST_VERSION_CURRENT, result); return result; } } @@ -216,9 +214,8 @@ @Override public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new StandardTokenizer(TEST_VERSION_CURRENT, reader); + TokenStream result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); result = new TestPosIncrementFilter(result); - result = new LowerCaseFilter(TEST_VERSION_CURRENT, result); return result; } } Index: lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java =================================================================== --- lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java (revision 944954) +++ lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java (working copy) @@ -28,20 +28,14 @@ import java.util.HashSet; import java.util.List; import java.util.Locale; -import java.util.Collections; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.KeywordAnalyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenFilter; import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.SimpleAnalyzer; -import org.apache.lucene.analysis.StopAnalyzer; -import org.apache.lucene.analysis.StopFilter; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.WhitespaceAnalyzer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.document.DateField; @@ -74,6 +68,7 @@ import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.LocalizedTestCase; +import org.apache.lucene.util.automaton.BasicAutomata; import org.apache.lucene.util.automaton.CharacterRunAutomaton; import org.apache.lucene.util.automaton.RegExp; @@ -334,9 +329,9 @@ public void testSimple() throws Exception { assertQueryEquals("\"term germ\"~2", null, "\"term germ\"~2"); assertQueryEquals("term term term", null, "term term term"); - assertQueryEquals("t�rm term term", new WhitespaceAnalyzer(TEST_VERSION_CURRENT), + assertQueryEquals("t�rm term term", new MockAnalyzer(MockTokenizer.WHITESPACE, false), "t�rm term term"); - assertQueryEquals("�mlaut", new WhitespaceAnalyzer(TEST_VERSION_CURRENT), "�mlaut"); + assertQueryEquals("�mlaut", new MockAnalyzer(MockTokenizer.WHITESPACE, false), "�mlaut"); assertQueryEquals("\"\"", new KeywordAnalyzer(), ""); assertQueryEquals("foo:\"\"", new KeywordAnalyzer(), "foo:"); @@ -391,7 +386,7 @@ "+(title:dog title:cat) -author:\"bob dole\""); QueryParserWrapper qp = new QueryParserWrapper("field", - new StandardAnalyzer(TEST_VERSION_CURRENT)); + new MockAnalyzer()); // make sure OR is the default: assertEquals(QueryParserWrapper.OR_OPERATOR, qp.getDefaultOperator()); qp.setDefaultOperator(QueryParserWrapper.AND_OPERATOR); @@ -401,7 +396,7 @@ } public void testPunct() throws Exception { - Analyzer a = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, false); assertQueryEquals("a&b", a, "a&b"); assertQueryEquals("a&&b", a, "a&&b"); assertQueryEquals(".NET", a, ".NET"); @@ -422,7 +417,7 @@ assertQueryEquals("term 1.0 1 2", null, "term"); assertQueryEquals("term term1 term2", null, "term term term"); - Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, false); assertQueryEquals("3", a, "3"); assertQueryEquals("term 1.0 1 2", a, "term 1.0 1 2"); assertQueryEquals("term term1 term2", a, "term term1 term2"); @@ -576,7 +571,7 @@ public void testFarsiRangeCollating() throws Exception { RAMDirectory ramDir = new RAMDirectory(); - IndexWriter iw = new IndexWriter(ramDir, new WhitespaceAnalyzer(TEST_VERSION_CURRENT), true, + IndexWriter iw = new IndexWriter(ramDir, new MockAnalyzer(MockTokenizer.WHITESPACE, false), true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES, @@ -586,7 +581,7 @@ IndexSearcher is = new IndexSearcher(ramDir, true); QueryParserWrapper qp = new QueryParserWrapper("content", - new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); + new MockAnalyzer(MockTokenizer.WHITESPACE, false)); // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi @@ -732,7 +727,7 @@ } public void testEscaped() throws Exception { - Analyzer a = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, false); /* * assertQueryEquals("\\[brackets", a, "\\[brackets"); @@ -829,7 +824,7 @@ } public void testQueryStringEscaping() throws Exception { - Analyzer a = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, false); assertEscapedQueryEquals("a-b:c", a, "a\\-b\\:c"); assertEscapedQueryEquals("a+b:c", a, "a\\+b\\:c"); @@ -898,7 +893,8 @@ } public void testBoost() throws Exception { - StandardAnalyzer oneStopAnalyzer = new StandardAnalyzer(TEST_VERSION_CURRENT, Collections.singleton("on")); + CharacterRunAutomaton stopSet = new CharacterRunAutomaton(BasicAutomata.makeString("on")); + Analyzer oneStopAnalyzer = new MockAnalyzer(MockTokenizer.SIMPLE, true, stopSet, true); QueryParserWrapper qp = new QueryParserWrapper("field", oneStopAnalyzer); Query q = qp.parse("on^1.0"); assertNotNull(q); @@ -912,7 +908,7 @@ assertNotNull(q); QueryParserWrapper qp2 = new QueryParserWrapper("field", - new StandardAnalyzer(TEST_VERSION_CURRENT)); + new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)); q = qp2.parse("the^3"); // "the" is a stop word so the result is an empty query: assertNotNull(q); @@ -940,7 +936,7 @@ public void testCustomQueryParserWildcard() { try { - new QPTestParser("contents", new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).parse("a?t"); + new QPTestParser("contents", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("a?t"); fail("Wildcard queries should not be allowed"); } catch (ParseException expected) { // expected exception @@ -949,7 +945,7 @@ public void testCustomQueryParserFuzzy() throws Exception { try { - new QPTestParser("contents", new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).parse("xunit~"); + new QPTestParser("contents", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("xunit~"); fail("Fuzzy queries should not be allowed"); } catch (ParseException expected) { // expected exception @@ -960,7 +956,7 @@ BooleanQuery.setMaxClauseCount(2); try { QueryParserWrapper qp = new QueryParserWrapper("field", - new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); + new MockAnalyzer(MockTokenizer.WHITESPACE, false)); qp.parse("one two three"); fail("ParseException expected due to too many boolean clauses"); } catch (ParseException expected) { @@ -973,7 +969,7 @@ */ public void testPrecedence() throws Exception { QueryParserWrapper qp = new QueryParserWrapper("field", - new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); + new MockAnalyzer(MockTokenizer.WHITESPACE, false)); Query query1 = qp.parse("A AND B OR C AND D"); Query query2 = qp.parse("+A +B +C +D"); @@ -983,7 +979,7 @@ public void testLocalDateFormat() throws IOException, ParseException { RAMDirectory ramDir = new RAMDirectory(); - IndexWriter iw = new IndexWriter(ramDir, new WhitespaceAnalyzer(TEST_VERSION_CURRENT), true, + IndexWriter iw = new IndexWriter(ramDir, new MockAnalyzer(MockTokenizer.WHITESPACE, false), true, IndexWriter.MaxFieldLength.LIMITED); addDateDoc("a", 2005, 12, 2, 10, 15, 33, iw); addDateDoc("b", 2005, 12, 4, 22, 15, 00, iw); @@ -1101,7 +1097,7 @@ public void testMatchAllDocs() throws Exception { QueryParserWrapper qp = new QueryParserWrapper("field", - new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); + new MockAnalyzer(MockTokenizer.WHITESPACE, false)); assertEquals(new MatchAllDocsQuery(), qp.parse("*:*")); assertEquals(new MatchAllDocsQuery(), qp.parse("(*:*)")); BooleanQuery bq = (BooleanQuery) qp.parse("+*:* -*:*"); @@ -1112,7 +1108,7 @@ private void assertHits(int expected, String query, IndexSearcher is) throws ParseException, IOException { QueryParserWrapper qp = new QueryParserWrapper("date", - new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); + new MockAnalyzer(MockTokenizer.WHITESPACE, false)); qp.setLocale(Locale.ENGLISH); Query q = qp.parse(query); ScoreDoc[] hits = is.search(q, null, 1000).scoreDocs; Index: lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/analyzing/TestAnalyzingQueryParser.java =================================================================== --- lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/analyzing/TestAnalyzingQueryParser.java (revision 944954) +++ lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/analyzing/TestAnalyzingQueryParser.java (working copy) @@ -21,11 +21,9 @@ import java.io.Reader; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.LowerCaseFilter; +import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.standard.StandardFilter; -import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.util.LuceneTestCase; @@ -107,7 +105,6 @@ } -// TODO: Use a TestAnalyzer instead final class TestFoldingFilter extends TokenFilter { final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); @@ -122,7 +119,6 @@ for (int i = 0; i < term.length; i++) switch(term[i]) { case 'ü': - case 'Ü': term[i] = 'u'; break; case 'ö': @@ -148,10 +144,8 @@ @Override public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream result = new StandardTokenizer(LuceneTestCase.TEST_VERSION_CURRENT, reader); - result = new StandardFilter(result); + TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); result = new TestFoldingFilter(result); - result = new LowerCaseFilter(LuceneTestCase.TEST_VERSION_CURRENT, result); return result; } } Index: lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java =================================================================== --- lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java (revision 944954) +++ lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java (working copy) @@ -19,11 +19,10 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenFilter; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.WhitespaceAnalyzer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.document.DateTools; @@ -36,6 +35,8 @@ import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.util.LocalizedTestCase; +import org.apache.lucene.util.automaton.BasicAutomata; +import org.apache.lucene.util.automaton.CharacterRunAutomaton; import java.io.IOException; import java.io.Reader; @@ -44,7 +45,6 @@ import java.util.Calendar; import java.util.GregorianCalendar; import java.util.HashSet; -import java.util.Collections; public class TestPrecedenceQueryParser extends LocalizedTestCase { @@ -240,7 +240,7 @@ assertQueryEquals("+title:(dog OR cat) -author:\"bob dole\"", null, "+(title:dog title:cat) -author:\"bob dole\""); - PrecedenceQueryParser qp = new PrecedenceQueryParser("field", new StandardAnalyzer(TEST_VERSION_CURRENT)); + PrecedenceQueryParser qp = new PrecedenceQueryParser("field", new MockAnalyzer()); // make sure OR is the default: assertEquals(PrecedenceQueryParser.OR_OPERATOR, qp.getDefaultOperator()); qp.setDefaultOperator(PrecedenceQueryParser.AND_OPERATOR); @@ -254,7 +254,7 @@ } public void testPunct() throws Exception { - Analyzer a = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, false); assertQueryEquals("a&b", a, "a&b"); assertQueryEquals("a&&b", a, "a&&b"); assertQueryEquals(".NET", a, ".NET"); @@ -274,7 +274,7 @@ assertQueryEquals("term 1.0 1 2", null, "term"); assertQueryEquals("term term1 term2", null, "term term term"); - Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, true); assertQueryEquals("3", a, "3"); assertQueryEquals("term 1.0 1 2", a, "term 1.0 1 2"); assertQueryEquals("term term1 term2", a, "term term1 term2"); @@ -412,7 +412,7 @@ } public void testEscaped() throws Exception { - Analyzer a = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); + Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, false); /*assertQueryEquals("\\[brackets", a, "\\[brackets"); assertQueryEquals("\\[brackets", null, "brackets"); @@ -517,7 +517,8 @@ public void testBoost() throws Exception { - StandardAnalyzer oneStopAnalyzer = new StandardAnalyzer(TEST_VERSION_CURRENT, Collections.singleton("on")); + CharacterRunAutomaton stopSet = new CharacterRunAutomaton(BasicAutomata.makeString("on")); + Analyzer oneStopAnalyzer = new MockAnalyzer(MockTokenizer.SIMPLE, true, stopSet, true); PrecedenceQueryParser qp = new PrecedenceQueryParser("field", oneStopAnalyzer); Query q = qp.parse("on^1.0"); assertNotNull(q); @@ -530,7 +531,7 @@ q = qp.parse("\"on\"^1.0"); assertNotNull(q); - q = getParser(new StandardAnalyzer(TEST_VERSION_CURRENT)).parse("the^3"); + q = getParser(new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)).parse("the^3"); assertNotNull(q); } @@ -544,7 +545,7 @@ public void testCustomQueryParserWildcard() { try { - new QPTestParser("contents", new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).parse("a?t"); + new QPTestParser("contents", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("a?t"); } catch (ParseException expected) { return; } @@ -553,7 +554,7 @@ public void testCustomQueryParserFuzzy() throws Exception { try { - new QPTestParser("contents", new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).parse("xunit~"); + new QPTestParser("contents", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("xunit~"); } catch (ParseException expected) { return; } @@ -563,7 +564,7 @@ public void testBooleanQuery() throws Exception { BooleanQuery.setMaxClauseCount(2); try { - getParser(new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).parse("one two three"); + getParser(new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("one two three"); fail("ParseException expected due to too many boolean clauses"); } catch (ParseException expected) { // too many boolean clauses, so ParseException is expected @@ -577,7 +578,7 @@ // failing tests disabled since PrecedenceQueryParser // is currently unmaintained public void _testPrecedence() throws Exception { - PrecedenceQueryParser parser = getParser(new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); + PrecedenceQueryParser parser = getParser(new MockAnalyzer(MockTokenizer.WHITESPACE, false)); Query query1 = parser.parse("A AND B OR C AND D"); Query query2 = parser.parse("(A AND B) OR (C AND D)"); assertEquals(query1, query2); Index: lucene/contrib/misc/src/test/org/apache/lucene/misc/TestHighFreqTerms.java =================================================================== --- lucene/contrib/misc/src/test/org/apache/lucene/misc/TestHighFreqTerms.java (revision 944954) +++ lucene/contrib/misc/src/test/org/apache/lucene/misc/TestHighFreqTerms.java (working copy) @@ -23,7 +23,8 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.store.MockRAMDirectory; -import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -37,7 +38,7 @@ super.setUp(); dir= new MockRAMDirectory(); writer = new IndexWriter(dir, new IndexWriterConfig( - TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)) + TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)) .setMaxBufferedDocs(2)); indexDocs(writer); reader = IndexReader.open(dir, true); Index: lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java =================================================================== --- lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java (revision 944954) +++ lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java (working copy) @@ -7,7 +7,8 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.analysis.MockTokenFilter; +import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericField; import org.apache.lucene.index.IndexReader; @@ -42,8 +43,8 @@ CoreParser builder; static Directory dir; - // TODO: change to CURRENT and rewrite test (this needs to set QueryParser.enablePositionIncrements, too, for work with CURRENT): - Analyzer analyzer=new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_24); + // TODO: rewrite test (this needs to set QueryParser.enablePositionIncrements, too, for work with CURRENT): + Analyzer analyzer=new MockAnalyzer(MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET, false); IndexReader reader; private IndexSearcher searcher; Index: lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java =================================================================== --- lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java (revision 944954) +++ lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java (working copy) @@ -19,9 +19,10 @@ import java.io.IOException; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.TermAttribute; @@ -59,7 +60,7 @@ final String TEXT = "the fox jumped"; final Directory directory = new RAMDirectory(); final IndexWriter indexWriter = new IndexWriter(directory, - new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); + new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); try { final Document document = new Document(); document.add(new Field(FIELD, new TokenStreamConcurrent(), @@ -102,7 +103,7 @@ final String TEXT = "the fox jumped"; final Directory directory = new RAMDirectory(); final IndexWriter indexWriter = new IndexWriter(directory, - new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); + new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); try { final Document document = new Document(); document.add(new Field(FIELD, new TokenStreamConcurrent(), @@ -171,7 +172,7 @@ final String TEXT = "the fox did not jump"; final Directory directory = new RAMDirectory(); final IndexWriter indexWriter = new IndexWriter(directory, - new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); + new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); try { final Document document = new Document(); document.add(new Field(FIELD, new TokenStreamSparse(), @@ -213,7 +214,7 @@ final String TEXT = "the fox did not jump"; final Directory directory = new RAMDirectory(); final IndexWriter indexWriter = new IndexWriter(directory, - new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); + new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); try { final Document document = new Document(); document.add(new Field(FIELD, TEXT, Store.YES, Index.ANALYZED, @@ -253,7 +254,7 @@ final String TEXT = "the fox did not jump"; final Directory directory = new RAMDirectory(); final IndexWriter indexWriter = new IndexWriter(directory, - new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); + new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); try { final Document document = new Document(); document.add(new Field(FIELD, new TokenStreamSparse(), Index: lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java =================================================================== --- lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (revision 944954) +++ lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (working copy) @@ -23,11 +23,9 @@ import java.io.StringReader; import java.util.ArrayList; import java.util.HashMap; -import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.StringTokenizer; import javax.xml.parsers.DocumentBuilder; @@ -35,12 +33,12 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; -import org.apache.lucene.analysis.LowerCaseTokenizer; -import org.apache.lucene.analysis.SimpleAnalyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenFilter; +import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.WhitespaceAnalyzer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.TermAttribute; @@ -80,7 +78,9 @@ import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; -import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.automaton.BasicAutomata; +import org.apache.lucene.util.automaton.CharacterRunAutomaton; +import org.apache.lucene.util.automaton.RegExp; import org.w3c.dom.Element; import org.w3c.dom.NodeList; @@ -97,7 +97,7 @@ RAMDirectory ramDir; public IndexSearcher searcher = null; int numHighlights = 0; - final Analyzer analyzer = new StandardAnalyzer(TEST_VERSION_CURRENT); + final Analyzer analyzer = new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true); TopDocs hits; String[] texts = { @@ -117,7 +117,7 @@ } public void testQueryScorerHits() throws Exception { - Analyzer analyzer = new SimpleAnalyzer(TEST_VERSION_CURRENT); + Analyzer analyzer = new MockAnalyzer(MockTokenizer.SIMPLE, true); QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer); query = qp.parse("\"very long\""); searcher = new IndexSearcher(ramDir, true); @@ -148,7 +148,7 @@ String s1 = "I call our world Flatland, not because we call it so,"; - QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, new StandardAnalyzer(TEST_VERSION_CURRENT)); + QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)); // Verify that a query against the default field results in text being // highlighted @@ -180,7 +180,7 @@ */ private static String highlightField(Query query, String fieldName, String text) throws IOException, InvalidTokenOffsetsException { - TokenStream tokenStream = new StandardAnalyzer(TEST_VERSION_CURRENT).tokenStream(fieldName, new StringReader(text)); + TokenStream tokenStream = new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true).tokenStream(fieldName, new StringReader(text)); // Assuming "", "" used to highlight SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(); QueryScorer scorer = new QueryScorer(query, fieldName, FIELD_NAME); @@ -225,7 +225,7 @@ String f2c = f2 + ":"; String q = "(" + f1c + ph1 + " OR " + f2c + ph1 + ") AND (" + f1c + ph2 + " OR " + f2c + ph2 + ")"; - Analyzer analyzer = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); + Analyzer analyzer = new MockAnalyzer(MockTokenizer.WHITESPACE, false); QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, f1, analyzer); Query query = qp.parse(q); @@ -1138,8 +1138,7 @@ @Override public void run() throws Exception { String goodWord = "goodtoken"; - Set stopWords = new HashSet(1); - stopWords.add("stoppedtoken"); + CharacterRunAutomaton stopWords = new CharacterRunAutomaton(BasicAutomata.makeString("stoppedtoken")); TermQuery query = new TermQuery(new Term("data", goodWord)); @@ -1149,16 +1148,16 @@ for (int i = 0; i < 10000; i++) { sb.append(" "); // only one stopword - sb.append(stopWords.iterator().next()); + sb.append("stoppedtoken"); } SimpleHTMLFormatter fm = new SimpleHTMLFormatter(); - Highlighter hg = getHighlighter(query, "data", new StandardAnalyzer(TEST_VERSION_CURRENT, stopWords).tokenStream( + Highlighter hg = getHighlighter(query, "data", new MockAnalyzer(MockTokenizer.SIMPLE, true, stopWords, true).tokenStream( "data", new StringReader(sb.toString())), fm);// new Highlighter(fm, // new // QueryTermScorer(query)); hg.setTextFragmenter(new NullFragmenter()); hg.setMaxDocCharsToAnalyze(100); - match = hg.getBestFragment(new StandardAnalyzer(TEST_VERSION_CURRENT, stopWords), "data", sb.toString()); + match = hg.getBestFragment(new MockAnalyzer(MockTokenizer.SIMPLE, true, stopWords, true), "data", sb.toString()); assertTrue("Matched text should be no more than 100 chars in length ", match.length() < hg .getMaxDocCharsToAnalyze()); @@ -1169,7 +1168,7 @@ // + whitespace) sb.append(" "); sb.append(goodWord); - match = hg.getBestFragment(new StandardAnalyzer(TEST_VERSION_CURRENT, stopWords), "data", sb.toString()); + match = hg.getBestFragment(new MockAnalyzer(MockTokenizer.SIMPLE, true, stopWords, true), "data", sb.toString()); assertTrue("Matched text should be no more than 100 chars in length ", match.length() < hg .getMaxDocCharsToAnalyze()); } @@ -1183,18 +1182,15 @@ TestHighlightRunner helper = new TestHighlightRunner() { @Override public void run() throws Exception { - Set stopWords = new HashSet(); - stopWords.add("in"); - stopWords.add("it"); + CharacterRunAutomaton stopWords = new CharacterRunAutomaton(new RegExp("i[nt]").toAutomaton()); TermQuery query = new TermQuery(new Term("text", "searchterm")); String text = "this is a text with searchterm in it"; SimpleHTMLFormatter fm = new SimpleHTMLFormatter(); - Highlighter hg = getHighlighter(query, "text", new StandardAnalyzer(TEST_VERSION_CURRENT, - stopWords).tokenStream("text", new StringReader(text)), fm); + Highlighter hg = getHighlighter(query, "text", new MockAnalyzer(MockTokenizer.SIMPLE, true, stopWords, true).tokenStream("text", new StringReader(text)), fm); hg.setTextFragmenter(new NullFragmenter()); hg.setMaxDocCharsToAnalyze(36); - String match = hg.getBestFragment(new StandardAnalyzer(TEST_VERSION_CURRENT, stopWords), "text", text); + String match = hg.getBestFragment(new MockAnalyzer(MockTokenizer.SIMPLE, true, stopWords, true), "text", text); assertTrue( "Matched text should contain remainder of text after highlighted query ", match.endsWith("in it")); @@ -1211,7 +1207,7 @@ numHighlights = 0; // test to show how rewritten query can still be used searcher = new IndexSearcher(ramDir, true); - Analyzer analyzer = new StandardAnalyzer(TEST_VERSION_CURRENT); + Analyzer analyzer = new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true); QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer); Query query = parser.parse("JF? or Kenned*"); @@ -1325,7 +1321,7 @@ // setup index 1 RAMDirectory ramDir1 = new RAMDirectory(); IndexWriter writer1 = new IndexWriter(ramDir1, new IndexWriterConfig( - TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT))); + TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true))); Document d = new Document(); Field f = new Field(FIELD_NAME, "multiOne", Field.Store.YES, Field.Index.ANALYZED); d.add(f); @@ -1337,7 +1333,7 @@ // setup index 2 RAMDirectory ramDir2 = new RAMDirectory(); IndexWriter writer2 = new IndexWriter(ramDir2, new IndexWriterConfig( - TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT))); + TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true))); d = new Document(); f = new Field(FIELD_NAME, "multiTwo", Field.Store.YES, Field.Index.ANALYZED); d.add(f); @@ -1350,7 +1346,7 @@ searchers[0] = new IndexSearcher(ramDir1, true); searchers[1] = new IndexSearcher(ramDir2, true); MultiSearcher multiSearcher = new MultiSearcher(searchers); - QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, new StandardAnalyzer(TEST_VERSION_CURRENT)); + QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)); parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); query = parser.parse("multi*"); if (VERBOSE) System.out.println("Searching for: " + query.toString(FIELD_NAME)); @@ -1530,64 +1526,64 @@ Highlighter highlighter; String result; - query = new QueryParser(TEST_VERSION_CURRENT, "text", new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).parse("foo"); + query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("foo"); highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this); result = highlighter.getBestFragments(getTS2(), s, 3, "..."); assertEquals("Hi-Speed10 foo", result); - query = new QueryParser(TEST_VERSION_CURRENT, "text", new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).parse("10"); + query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("10"); highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this); result = highlighter.getBestFragments(getTS2(), s, 3, "..."); assertEquals("Hi-Speed10 foo", result); - query = new QueryParser(TEST_VERSION_CURRENT, "text", new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).parse("hi"); + query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("hi"); highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this); result = highlighter.getBestFragments(getTS2(), s, 3, "..."); assertEquals("Hi-Speed10 foo", result); - query = new QueryParser(TEST_VERSION_CURRENT, "text", new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).parse("speed"); + query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("speed"); highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this); result = highlighter.getBestFragments(getTS2(), s, 3, "..."); assertEquals("Hi-Speed10 foo", result); - query = new QueryParser(TEST_VERSION_CURRENT, "text", new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).parse("hispeed"); + query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("hispeed"); highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this); result = highlighter.getBestFragments(getTS2(), s, 3, "..."); assertEquals("Hi-Speed10 foo", result); - query = new QueryParser(TEST_VERSION_CURRENT, "text", new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).parse("hi speed"); + query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("hi speed"); highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this); result = highlighter.getBestFragments(getTS2(), s, 3, "..."); assertEquals("Hi-Speed10 foo", result); // ///////////////// same tests, just put the bigger overlapping token // first - query = new QueryParser(TEST_VERSION_CURRENT, "text", new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).parse("foo"); + query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("foo"); highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this); result = highlighter.getBestFragments(getTS2a(), s, 3, "..."); assertEquals("Hi-Speed10 foo", result); - query = new QueryParser(TEST_VERSION_CURRENT, "text", new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).parse("10"); + query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("10"); highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this); result = highlighter.getBestFragments(getTS2a(), s, 3, "..."); assertEquals("Hi-Speed10 foo", result); - query = new QueryParser(TEST_VERSION_CURRENT, "text", new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).parse("hi"); + query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("hi"); highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this); result = highlighter.getBestFragments(getTS2a(), s, 3, "..."); assertEquals("Hi-Speed10 foo", result); - query = new QueryParser(TEST_VERSION_CURRENT, "text", new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).parse("speed"); + query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("speed"); highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this); result = highlighter.getBestFragments(getTS2a(), s, 3, "..."); assertEquals("Hi-Speed10 foo", result); - query = new QueryParser(TEST_VERSION_CURRENT, "text", new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).parse("hispeed"); + query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("hispeed"); highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this); result = highlighter.getBestFragments(getTS2a(), s, 3, "..."); assertEquals("Hi-Speed10 foo", result); - query = new QueryParser(TEST_VERSION_CURRENT, "text", new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).parse("hi speed"); + query = new QueryParser(TEST_VERSION_CURRENT, "text", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("hi speed"); highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this); result = highlighter.getBestFragments(getTS2a(), s, 3, "..."); assertEquals("Hi-Speed10 foo", result); @@ -1598,7 +1594,7 @@ } private Directory dir = new RAMDirectory(); - private Analyzer a = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); + private Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, false); public void testWeightedTermsWithDeletes() throws IOException, ParseException, InvalidTokenOffsetsException { makeIndex(); @@ -1613,7 +1609,7 @@ } private void makeIndex() throws IOException { - IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); + IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); writer.addDocument( doc( "t_text1", "random words for highlighting tests del" ) ); writer.addDocument( doc( "t_text1", "more random words for second field del" ) ); writer.addDocument( doc( "t_text1", "random words for highlighting tests del" ) ); @@ -1623,7 +1619,7 @@ } private void deleteDocument() throws IOException { - IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND)); + IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false)).setOpenMode(OpenMode.APPEND)); writer.deleteDocuments( new Term( "t_text1", "del" ) ); // To see negative idf, keep comment the following line //writer.optimize(); @@ -1726,7 +1722,7 @@ super.setUp(); ramDir = new RAMDirectory(); IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig( - TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT))); + TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true))); for (int i = 0; i < texts.length; i++) { addDoc(writer, texts[i]); } @@ -1798,7 +1794,7 @@ */ @Override public TokenStream tokenStream(String arg0, Reader arg1) { - LowerCaseTokenizer stream = new LowerCaseTokenizer(LuceneTestCase.TEST_VERSION_CURRENT, arg1); + Tokenizer stream = new MockTokenizer(arg1, MockTokenizer.SIMPLE, true); stream.addAttribute(TermAttribute.class); stream.addAttribute(PositionIncrementAttribute.class); stream.addAttribute(OffsetAttribute.class); Index: lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java =================================================================== --- lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java (revision 944954) +++ lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java (working copy) @@ -22,10 +22,10 @@ import java.util.Collection; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.KeywordAnalyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.document.Document; @@ -86,9 +86,9 @@ @Override protected void setUp() throws Exception { super.setUp(); - analyzerW = new WhitespaceAnalyzer(TEST_VERSION_CURRENT); + analyzerW = new MockAnalyzer(MockTokenizer.WHITESPACE, false); analyzerB = new BigramAnalyzer(); - analyzerK = new KeywordAnalyzer(); + analyzerK = new MockAnalyzer(MockTokenizer.KEYWORD, false); paW = new QueryParser(TEST_VERSION_CURRENT, F, analyzerW ); paB = new QueryParser(TEST_VERSION_CURRENT, F, analyzerB ); dir = new RAMDirectory(); Index: lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestLuceneDictionary.java =================================================================== --- lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestLuceneDictionary.java (revision 944954) +++ lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestLuceneDictionary.java (working copy) @@ -20,6 +20,8 @@ import java.io.IOException; import java.util.Iterator; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -47,7 +49,7 @@ @Override protected void setUp() throws Exception { super.setUp(); - IndexWriter writer = new IndexWriter(store, new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); + IndexWriter writer = new IndexWriter(store, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); Document doc; Index: lucene/contrib/queries/src/test/org/apache/lucene/search/BooleanFilterTest.java =================================================================== --- lucene/contrib/queries/src/test/org/apache/lucene/search/BooleanFilterTest.java (revision 944954) +++ lucene/contrib/queries/src/test/org/apache/lucene/search/BooleanFilterTest.java (working copy) @@ -19,6 +19,8 @@ import java.io.IOException; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -38,7 +40,7 @@ super.setUp(); directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig( - TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))); + TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))); //Add series of docs with filterable fields : acces rights, prices, dates and "in-stock" flags addDoc(writer, "admin guest", "010", "20040101","Y");