Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java =================================================================== --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java (revision 805644) +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java (working copy) @@ -89,6 +89,17 @@ checkReuse(new GermanSubclassAnalyzer(), "Tischen", "Tischen"); } + /* + * Test that changes to the exclusion table are applied immediately + * when using reusable token streams. + */ + public void testExclusionTableReuse() throws Exception { + GermanAnalyzer a = new GermanAnalyzer(); + checkReuse(a, "tischen", "tisch"); + a.setStemExclusionTable(new String[] { "tischen" }); + checkReuse(a, "tischen", "tischen"); + } + private void check(final String input, final String expected) throws IOException { Analyzer a = new GermanAnalyzer(); TokenStream tokenStream = a.tokenStream("dummy", new StringReader(input)); Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java =================================================================== --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java (revision 805644) +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java (working copy) @@ -139,6 +139,17 @@ a.setStemExclusionTable(new String[] { "quintessência" }); checkReuse(a, "quintessência", "quintessência"); // excluded words will be completely unchanged. } + + /* + * Test that changes to the exclusion table are applied immediately + * when using reusable token streams. + */ + public void testExclusionTableReuse() throws Exception { + BrazilianAnalyzer a = new BrazilianAnalyzer(); + checkReuse(a, "quintessência", "quintessente"); + a.setStemExclusionTable(new String[] { "quintessência" }); + checkReuse(a, "quintessência", "quintessência"); + } private void check(final String input, final String expected) throws IOException { Analyzer analyzer = new BrazilianAnalyzer(); Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java =================================================================== --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java (revision 805644) +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java (working copy) @@ -221,4 +221,14 @@ "captif" }); } + /* + * Test that changes to the exclusion table are applied immediately + * when using reusable token streams. + */ + public void testExclusionTableReuse() throws Exception { + FrenchAnalyzer fa = new FrenchAnalyzer(); + assertAnalyzesToReuse(fa, "habitable", new String[] { "habit" }); + fa.setStemExclusionTable(new String[] { "habitable" }); + assertAnalyzesToReuse(fa, "habitable", new String[] { "habitable" }); + } } Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java =================================================================== --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java (revision 805644) +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java (working copy) @@ -17,6 +17,7 @@ * limitations under the License. */ +import java.io.File; import java.io.IOException; import java.io.Reader; import java.io.StringReader; @@ -35,6 +36,8 @@ * */ public class TestDutchStemmer extends TestCase { + File dataDir = new File(System.getProperty("dataDir", "./bin")); + File customDictFile = new File(dataDir, "org/apache/lucene/analysis/nl/customStemDict.txt"); public void testWithSnowballExamples() throws IOException { check("lichaamsziek", "lichaamsziek"); @@ -144,7 +147,28 @@ checkReuse(a, "lichamelijkheden", "lichamelijkheden"); } - + /* + * Test that changes to the exclusion table are applied immediately + * when using reusable token streams. + */ + public void testExclusionTableReuse() throws Exception { + DutchAnalyzer a = new DutchAnalyzer(); + checkReuse(a, "lichamelijk", "licham"); + a.setStemExclusionTable(new String[] { "lichamelijk" }); + checkReuse(a, "lichamelijk", "lichamelijk"); + } + + /* + * Test that changes to the dictionary stemming table are applied immediately + * when using reusable token streams. + */ + public void testStemDictionaryReuse() throws Exception { + DutchAnalyzer a = new DutchAnalyzer(); + checkReuse(a, "lichamelijk", "licham"); + a.setStemDictionary(customDictFile); + checkReuse(a, "lichamelijk", "somethingentirelydifferent"); + } + private void check(final String input, final String expected) throws IOException { Analyzer analyzer = new DutchAnalyzer(); TokenStream stream = analyzer.tokenStream("dummy", new StringReader(input)); Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/customStemDict.txt =================================================================== --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/customStemDict.txt (revision 0) +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/customStemDict.txt (revision 0) @@ -0,0 +1,3 @@ +lichamelijk somethingentirelydifferent +lichamelijke licham +lichamelijkheden licham Property changes on: contrib\analyzers\common\src\test\org\apache\lucene\analysis\nl\customStemDict.txt ___________________________________________________________________ Added: svn:eol-style + native Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java =================================================================== --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java (revision 805644) +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java (working copy) @@ -17,6 +17,10 @@ * limitations under the License. */ +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; import java.io.StringReader; import junit.framework.TestCase; @@ -32,17 +36,55 @@ * */ public class TestCzechAnalyzer extends TestCase { - + File dataDir = new File(System.getProperty("dataDir", "./bin")); + File customStopFile = new File(dataDir, "org/apache/lucene/analysis/cz/customStopWordFile.txt"); + public void testStopWord() throws Exception { assertAnalyzesTo(new CzechAnalyzer(), "Pokud mluvime o volnem", new String[] { "mluvime", "volnem" }); } - + public void testReusableTokenStream() throws Exception { Analyzer analyzer = new CzechAnalyzer(); assertAnalyzesToReuse(analyzer, "Pokud mluvime o volnem", new String[] { "mluvime", "volnem" }); assertAnalyzesToReuse(analyzer, "Česká Republika", new String[] { "česká", "republika" }); } + /* + * An input stream that always throws IOException for testing. + */ + private class UnreliableInputStream extends InputStream { + public int read() throws IOException { + throw new IOException(); + } + } + + /* + * The loadStopWords method does not throw IOException on error, + * instead previously it set the stoptable to null (versus empty) + * this would cause a NPE when it is time to create the StopFilter. + */ + public void testInvalidStopWordFile() throws Exception { + CzechAnalyzer cz = new CzechAnalyzer(); + cz.loadStopWords(new UnreliableInputStream(), "UTF-8"); + assertAnalyzesTo(cz, "Pokud mluvime o volnem", + new String[] { "pokud", "mluvime", "o", "volnem" }); + } + + /* + * Test that changes to the stop table via loadStopWords are applied immediately + * when using reusable token streams. + */ + public void testStopWordFileReuse() throws Exception { + CzechAnalyzer cz = new CzechAnalyzer(); + assertAnalyzesToReuse(cz, "Česká Republika", + new String[] { "česká", "republika" }); + + InputStream stopwords = new FileInputStream(customStopFile); + cz.loadStopWords(stopwords, "UTF-8"); + + assertAnalyzesToReuse(cz, "Česká Republika", new String[] { "česká" }); + } + private void assertAnalyzesTo(Analyzer a, String input, String[] output) throws Exception { TokenStream ts = a.tokenStream("dummy", new StringReader(input)); TermAttribute text = (TermAttribute) ts.getAttribute(TermAttribute.class); Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/cz/customStopWordFile.txt =================================================================== --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/cz/customStopWordFile.txt (revision 0) +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/cz/customStopWordFile.txt (revision 0) @@ -0,0 +1,3 @@ +examplestopword +anotherexamplestopword +republika Property changes on: contrib\analyzers\common\src\test\org\apache\lucene\analysis\cz\customStopWordFile.txt ___________________________________________________________________ Added: svn:eol-style + native Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java (revision 805644) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java (working copy) @@ -114,6 +114,7 @@ */ public void setStemExclusionTable(String[] exclusionlist) { exclusionSet = StopFilter.makeStopSet(exclusionlist); + setPreviousTokenStream(null); // force a new stemmer to be created } /** @@ -121,6 +122,7 @@ */ public void setStemExclusionTable(Map exclusionlist) { exclusionSet = new HashSet(exclusionlist.keySet()); + setPreviousTokenStream(null); // force a new stemmer to be created } /** @@ -128,6 +130,7 @@ */ public void setStemExclusionTable(File exclusionlist) throws IOException { exclusionSet = WordlistLoader.getWordSet(exclusionlist); + setPreviousTokenStream(null); // force a new stemmer to be created } /** Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java (revision 805644) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java (working copy) @@ -111,18 +111,21 @@ */ public void setStemExclusionTable( String[] exclusionlist ) { excltable = StopFilter.makeStopSet( exclusionlist ); + setPreviousTokenStream(null); // force a new stemmer to be created } /** * Builds an exclusionlist from a {@link Map}. */ public void setStemExclusionTable( Map exclusionlist ) { excltable = new HashSet(exclusionlist.keySet()); + setPreviousTokenStream(null); // force a new stemmer to be created } /** * Builds an exclusionlist from the words contained in the given file. */ public void setStemExclusionTable( File exclusionlist ) throws IOException { excltable = WordlistLoader.getWordSet( exclusionlist ); + setPreviousTokenStream(null); // force a new stemmer to be created } /** Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java (revision 805644) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java (working copy) @@ -111,6 +111,7 @@ */ public void setStemExclusionTable(String[] exclusionlist) { excltable = StopFilter.makeStopSet(exclusionlist); + setPreviousTokenStream(null); // force a new stemmer to be created } /** @@ -118,6 +119,7 @@ */ public void setStemExclusionTable(Map exclusionlist) { excltable = new HashSet(exclusionlist.keySet()); + setPreviousTokenStream(null); // force a new stemmer to be created } /** @@ -126,6 +128,7 @@ */ public void setStemExclusionTable(File exclusionlist) throws IOException { excltable = new HashSet(WordlistLoader.getWordSet(exclusionlist)); + setPreviousTokenStream(null); // force a new stemmer to be created } /** Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java (revision 805644) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java (working copy) @@ -131,6 +131,7 @@ */ public void setStemExclusionTable(String[] exclusionlist) { excltable = StopFilter.makeStopSet(exclusionlist); + setPreviousTokenStream(null); // force a new stemmer to be created } /** @@ -138,6 +139,7 @@ */ public void setStemExclusionTable(HashSet exclusionlist) { excltable = exclusionlist; + setPreviousTokenStream(null); // force a new stemmer to be created } /** @@ -146,6 +148,7 @@ public void setStemExclusionTable(File exclusionlist) { try { excltable = org.apache.lucene.analysis.WordlistLoader.getWordSet(exclusionlist); + setPreviousTokenStream(null); // force a new stemmer to be created } catch (IOException e) { // TODO: throw IOException throw new RuntimeException(e); @@ -160,6 +163,7 @@ public void setStemDictionary(File stemdictFile) { try { stemdict = org.apache.lucene.analysis.WordlistLoader.getStemDict(stemdictFile); + setPreviousTokenStream(null); // force a new stemmer to be created } catch (IOException e) { // TODO: throw IOException throw new RuntimeException(e); @@ -210,7 +214,7 @@ streams.source = new StandardTokenizer(reader); streams.result = new StandardFilter(streams.source); streams.result = new StopFilter(streams.result, stoptable); - streams.result = new DutchStemFilter(streams.result, excltable); + streams.result = new DutchStemFilter(streams.result, excltable, stemdict); setPreviousTokenStream(streams); } else { streams.source.reset(reader); Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java =================================================================== --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java (revision 805644) +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java (working copy) @@ -100,6 +100,7 @@ * @param encoding Encoding used (win-1250, iso-8859-2, ...), null for default system encoding */ public void loadStopWords( InputStream wordfile, String encoding ) { + setPreviousTokenStream(null); // force a new stopfilter to be created if ( wordfile == null ) { stoptable = new HashSet(); return; @@ -121,7 +122,9 @@ } } catch ( IOException e ) { - stoptable = null; + // clear any previous table (if present) + // TODO: throw IOException + stoptable = new HashSet(); } }