Index: lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java =================================================================== --- lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java (revision 944915) +++ lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java (working copy) @@ -32,6 +32,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.KeywordAnalyzer; import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenFilter; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.StopAnalyzer; import org.apache.lucene.analysis.StopFilter; @@ -65,6 +66,8 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.MockRAMDirectory; import org.apache.lucene.util.LocalizedTestCase; +import org.apache.lucene.util.automaton.CharacterRunAutomaton; +import org.apache.lucene.util.automaton.RegExp; /** * Tests QueryParser. @@ -957,7 +960,8 @@ } public void testStopwords() throws Exception { - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "a", new StopAnalyzer(TEST_VERSION_CURRENT, StopFilter.makeStopSet(TEST_VERSION_CURRENT, "the", "foo"))); + CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton()); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "a", new MockAnalyzer(MockTokenizer.SIMPLE, true, stopSet, true)); Query result = qp.parse("a:the OR a:foo"); assertNotNull("result is null and it shouldn't be", result); assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery); @@ -973,7 +977,7 @@ } public void testPositionIncrement() throws Exception { - QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "a", new StopAnalyzer(TEST_VERSION_CURRENT, StopFilter.makeStopSet(TEST_VERSION_CURRENT, "the", "in", "are", "this"))); + QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "a", new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)); qp.setEnablePositionIncrements(true); String qtxt = "\"the words in poisitions pos02578 are stopped in this phrasequery\""; // 0 2 5 7 8 Index: lucene/src/test/org/apache/lucene/search/TestPhraseQuery.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestPhraseQuery.java (revision 944915) +++ lucene/src/test/org/apache/lucene/search/TestPhraseQuery.java (working copy) @@ -53,7 +53,7 @@ Analyzer analyzer = new Analyzer() { @Override public TokenStream tokenStream(String fieldName, Reader reader) { - return new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader); + return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); } @Override @@ -207,7 +207,7 @@ public void testPhraseQueryWithStopAnalyzer() throws Exception { RAMDirectory directory = new RAMDirectory(); - StopAnalyzer stopAnalyzer = new StopAnalyzer(Version.LUCENE_24); + Analyzer stopAnalyzer = new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, false); IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig( Version.LUCENE_24, stopAnalyzer)); Document doc = new Document(); @@ -360,7 +360,7 @@ } public void testToString() throws Exception { - StopAnalyzer analyzer = new StopAnalyzer(TEST_VERSION_CURRENT); + Analyzer analyzer = new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true); QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", analyzer); qp.setEnablePositionIncrements(true); PhraseQuery q = (PhraseQuery)qp.parse("\"this hi this is a test is\""); Index: lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java (revision 944915) +++ lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java (working copy) @@ -54,6 +54,9 @@ import org.apache.lucene.search.spans.Spans; import org.apache.lucene.util.Version; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.automaton.BasicAutomata; +import org.apache.lucene.util.automaton.CharacterRunAutomaton; +import org.apache.lucene.util.automaton.RegExp; /** * Term position unit test. @@ -196,7 +199,7 @@ // should not find "1 2" because there is a gap of 1 in the index QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", - new StopWhitespaceAnalyzer(false)); + new MockAnalyzer(MockTokenizer.WHITESPACE, false, stopStopList, false)); q = (PhraseQuery) qp.parse("\"1 2\""); hits = searcher.search(q, null, 1000).scoreDocs; assertEquals(0, hits.length); @@ -220,26 +223,16 @@ // when both qp qnd stopFilter propagate increments, we should find the doc. qp = new QueryParser(TEST_VERSION_CURRENT, "field", - new StopWhitespaceAnalyzer(true)); + new MockAnalyzer(MockTokenizer.WHITESPACE, false, stopStopList, true)); qp.setEnablePositionIncrements(true); q = (PhraseQuery) qp.parse("\"1 stop 2\""); hits = searcher.search(q, null, 1000).scoreDocs; assertEquals(1, hits.length); } - private static class StopWhitespaceAnalyzer extends Analyzer { - boolean enablePositionIncrements; - final MockAnalyzer a = new MockAnalyzer(); - public StopWhitespaceAnalyzer(boolean enablePositionIncrements) { - this.enablePositionIncrements = enablePositionIncrements; - } - @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream ts = a.tokenStream(fieldName,reader); - return new StopFilter(enablePositionIncrements?TEST_VERSION_CURRENT:Version.LUCENE_24, ts, - new CharArraySet(TEST_VERSION_CURRENT, Collections.singleton("stop"), true)); - } - } + // stoplist that accepts case-insensitive "stop" + private static final CharacterRunAutomaton stopStopList = + new CharacterRunAutomaton(new RegExp("[sS][tT][oO][pP]").toAutomaton()); public void testPayloadsPos0() throws Exception { Directory dir = new MockRAMDirectory(); Index: lucene/src/test/org/apache/lucene/index/TestIndexWriter.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (revision 944915) +++ lucene/src/test/org/apache/lucene/index/TestIndexWriter.java (working copy) @@ -38,6 +38,8 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenFilter; +import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.StopAnalyzer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; @@ -1868,7 +1870,7 @@ Analyzer analyzer = new Analyzer() { @Override public TokenStream tokenStream(String fieldName, Reader reader) { - return new CrashingFilter(fieldName, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader)); + return new CrashingFilter(fieldName, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false)); } }; @@ -1951,7 +1953,7 @@ Analyzer analyzer = new Analyzer() { @Override public TokenStream tokenStream(String fieldName, Reader reader) { - return new CrashingFilter(fieldName, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader)); + return new CrashingFilter(fieldName, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false)); } }; @@ -3098,7 +3100,7 @@ Analyzer analyzer = new Analyzer() { @Override public TokenStream tokenStream(String fieldName, Reader reader) { - return new CrashingFilter(fieldName, new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader)); + return new CrashingFilter(fieldName, new MockTokenizer(reader, MockTokenizer.WHITESPACE, false)); } }; @@ -4185,7 +4187,7 @@ public void testEndOffsetPositionStopFilter() throws Exception { MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, new IndexWriterConfig( - TEST_VERSION_CURRENT, new StopAnalyzer(TEST_VERSION_CURRENT))); + TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true))); Document doc = new Document(); Field f = new Field("field", "abcd the", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); doc.add(f); @@ -4486,23 +4488,23 @@ Document doc = new Document(); Field f = new Field("binary", b, 10, 17); - f.setTokenStream(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("doc1field1"))); + f.setTokenStream(new MockTokenizer(new StringReader("doc1field1"), MockTokenizer.WHITESPACE, false)); Field f2 = new Field("string", "value", Field.Store.YES,Field.Index.ANALYZED); - f2.setTokenStream(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("doc1field2"))); + f2.setTokenStream(new MockTokenizer(new StringReader("doc1field2"), MockTokenizer.WHITESPACE, false)); doc.add(f); doc.add(f2); w.addDocument(doc); // add 2 docs to test in-memory merging - f.setTokenStream(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("doc2field1"))); - f2.setTokenStream(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("doc2field2"))); + f.setTokenStream(new MockTokenizer(new StringReader("doc2field1"), MockTokenizer.WHITESPACE, false)); + f2.setTokenStream(new MockTokenizer(new StringReader("doc2field2"), MockTokenizer.WHITESPACE, false)); w.addDocument(doc); // force segment flush so we can force a segment merge with doc3 later. w.commit(); - f.setTokenStream(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("doc3field1"))); - f2.setTokenStream(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("doc3field2"))); + f.setTokenStream(new MockTokenizer(new StringReader("doc3field1"), MockTokenizer.WHITESPACE, false)); + f2.setTokenStream(new MockTokenizer(new StringReader("doc3field2"), MockTokenizer.WHITESPACE, false)); w.addDocument(doc); w.commit(); Index: lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java (revision 944915) +++ lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java (working copy) @@ -22,6 +22,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.MockAnalyzer; @@ -108,7 +109,7 @@ Analyzer analyzer = new Analyzer() { @Override public TokenStream tokenStream(String fieldName, Reader reader) { - return new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader); + return new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); } @Override @@ -141,7 +142,7 @@ Analyzer analyzer = new Analyzer() { @Override public TokenStream tokenStream(String fieldName, Reader reader) { - return new TokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader)) { + return new TokenFilter(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false)) { boolean first=true; AttributeSource.State state; Index: lucene/src/test/org/apache/lucene/index/TestPayloads.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestPayloads.java (revision 944915) +++ lucene/src/test/org/apache/lucene/index/TestPayloads.java (working copy) @@ -28,6 +28,7 @@ import java.util.Random; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.MockAnalyzer; @@ -407,7 +408,7 @@ @Override public TokenStream tokenStream(String fieldName, Reader reader) { PayloadData payload = fieldToData.get(fieldName); - TokenStream ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader); + TokenStream ts = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); if (payload != null) { if (payload.numFieldInstancesToSkip == 0) { ts = new PayloadFilter(ts, payload.data, payload.offset, payload.length); Index: lucene/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestSynonymTokenFilter.java =================================================================== --- lucene/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestSynonymTokenFilter.java (revision 944915) +++ lucene/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestSynonymTokenFilter.java (working copy) @@ -24,6 +24,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.LowerCaseFilter; +import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.WhitespaceTokenizer; @@ -93,8 +94,7 @@ @Override public TokenStream tokenStream(String fieldName, Reader reader) { - TokenStream ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader); - ts = new LowerCaseFilter(TEST_VERSION_CURRENT, ts); + TokenStream ts = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); ts = new SynonymTokenFilter(ts, synonyms, maxSynonyms); return ts; } @@ -110,9 +110,8 @@ SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); - streams.source = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader); - streams.result = new LowerCaseFilter(TEST_VERSION_CURRENT, streams.source); - streams.result = new SynonymTokenFilter(streams.result, synonyms, maxSynonyms); + streams.source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true); + streams.result = new SynonymTokenFilter(streams.source, synonyms, maxSynonyms); setPreviousTokenStream(streams); } else { streams.source.reset(reader); Index: lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java =================================================================== --- lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java (revision 944915) +++ lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java (working copy) @@ -35,6 +35,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.KeywordAnalyzer; import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenFilter; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.SimpleAnalyzer; import org.apache.lucene.analysis.StopAnalyzer; @@ -55,6 +56,7 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; import org.apache.lucene.messages.MessageImpl; +import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.queryParser.core.QueryNodeException; import org.apache.lucene.queryParser.core.messages.QueryParserMessages; import org.apache.lucene.queryParser.core.nodes.FuzzyQueryNode; @@ -79,6 +81,8 @@ import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.MockRAMDirectory; import org.apache.lucene.util.LocalizedTestCase; +import org.apache.lucene.util.automaton.CharacterRunAutomaton; +import org.apache.lucene.util.automaton.RegExp; /** * This test case is a copy of the core Lucene query parser test, it was adapted @@ -1074,8 +1078,8 @@ public void testStopwords() throws Exception { StandardQueryParser qp = new StandardQueryParser(); - qp.setAnalyzer( - new StopAnalyzer(TEST_VERSION_CURRENT, StopFilter.makeStopSet(TEST_VERSION_CURRENT, "the", "foo" ))); + CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton()); + qp.setAnalyzer(new MockAnalyzer(MockTokenizer.SIMPLE, true, stopSet, true)); Query result = qp.parse("a:the OR a:foo", "a"); assertNotNull("result is null and it shouldn't be", result); @@ -1098,7 +1102,7 @@ public void testPositionIncrement() throws Exception { StandardQueryParser qp = new StandardQueryParser(); qp.setAnalyzer( - new StopAnalyzer(TEST_VERSION_CURRENT, StopFilter.makeStopSet(TEST_VERSION_CURRENT, "the", "in", "are", "this" ))); + new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)); qp.setEnablePositionIncrements(true); Index: lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java =================================================================== --- lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java (revision 944915) +++ lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java (working copy) @@ -33,6 +33,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.KeywordAnalyzer; import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenFilter; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.SimpleAnalyzer; import org.apache.lucene.analysis.StopAnalyzer; @@ -73,6 +74,8 @@ import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.LocalizedTestCase; +import org.apache.lucene.util.automaton.CharacterRunAutomaton; +import org.apache.lucene.util.automaton.RegExp; /** * This test case is a copy of the core Lucene query parser test, it was adapted @@ -1059,7 +1062,8 @@ } public void testStopwords() throws Exception { - QueryParserWrapper qp = new QueryParserWrapper("a", new StopAnalyzer(TEST_VERSION_CURRENT, StopFilter.makeStopSet(TEST_VERSION_CURRENT, "the", "foo"))); + CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton()); + QueryParserWrapper qp = new QueryParserWrapper("a", new MockAnalyzer(MockTokenizer.SIMPLE, true, stopSet, true)); Query result = qp.parse("a:the OR a:foo"); assertNotNull("result is null and it shouldn't be", result); assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery); @@ -1078,7 +1082,7 @@ } public void testPositionIncrement() throws Exception { - QueryParserWrapper qp = new QueryParserWrapper("a", new StopAnalyzer(TEST_VERSION_CURRENT, StopFilter.makeStopSet(TEST_VERSION_CURRENT, "the", "in", "are", "this"))); + QueryParserWrapper qp = new QueryParserWrapper("a", new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)); qp.setEnablePositionIncrements(true); String qtxt = "\"the words in poisitions pos02578 are stopped in this phrasequery\""; // 0 2 5 7 8 Index: lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java =================================================================== --- lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java (revision 944915) +++ lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java (working copy) @@ -28,6 +28,9 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.KeywordAnalyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenFilter; +import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.SimpleAnalyzer; import org.apache.lucene.analysis.StopAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; @@ -143,9 +146,9 @@ */ private Analyzer randomAnalyzer() { switch(random.nextInt(3)) { - case 0: return new SimpleAnalyzer(TEST_VERSION_CURRENT); - case 1: return new StopAnalyzer(TEST_VERSION_CURRENT); - default: return new StandardAnalyzer(TEST_VERSION_CURRENT); + case 0: return new MockAnalyzer(MockTokenizer.SIMPLE, true); + case 1: return new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true); + default: return new MockAnalyzer(MockTokenizer.WHITESPACE, false); } }