Index: solr/core/src/test/org/apache/solr/analysis/TestMultiWordSynonyms.java =================================================================== --- solr/core/src/test/org/apache/solr/analysis/TestMultiWordSynonyms.java (revision 1338520) +++ solr/core/src/test/org/apache/solr/analysis/TestMultiWordSynonyms.java (working copy) @@ -62,24 +62,4 @@ // This fails because ["e","e"] is the value of the token stream assertTokenStreamContents(ts, new String[] { "a", "e" }); } - - private class StringMockSolrResourceLoader implements ResourceLoader { - String text; - - StringMockSolrResourceLoader(String text) { - this.text = text; - } - - public List getLines(String resource) throws IOException { - return null; - } - - public T newInstance(String cname, Class expectedType, String... subpackages) { - return null; - } - - public InputStream openResource(String resource) throws IOException { - return new ByteArrayInputStream(text.getBytes("UTF-8")); - } - } } Index: solr/core/src/test/org/apache/solr/analysis/TestSynonymMap.java =================================================================== --- solr/core/src/test/org/apache/solr/analysis/TestSynonymMap.java (revision 1338520) +++ solr/core/src/test/org/apache/solr/analysis/TestSynonymMap.java (working copy) @@ -19,6 +19,7 @@ import java.io.IOException; import java.io.InputStream; +import java.nio.charset.Charset; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -272,27 +273,7 @@ args.put( "synonyms", "something.txt" ); SlowSynonymFilterFactory ff = new SlowSynonymFilterFactory(); ff.init(args); - ff.inform( new ResourceLoader() { - @Override - public List getLines(String resource) throws IOException { - if( !"something.txt".equals(resource) ) { - throw new RuntimeException( "should not get a differnt resource" ); - } - List rules = new ArrayList(); - rules.add( "a,b" ); - return rules; - } - - @Override - public T newInstance(String cname, Class expectedType, String... subpackages) { - throw new RuntimeException("stub"); - } - - @Override - public InputStream openResource(String resource) throws IOException { - throw new RuntimeException("stub"); - } - }); + ff.inform(new StringMockSolrResourceLoader("a,b")); SlowSynonymMap synMap = ff.getSynonymMap(); assertEquals( 2, synMap.submap.size() ); Index: solr/core/src/test/org/apache/solr/analysis/SnowballPorterFilterFactoryTest.java =================================================================== --- solr/core/src/test/org/apache/solr/analysis/SnowballPorterFilterFactoryTest.java (revision 1338520) +++ solr/core/src/test/org/apache/solr/analysis/SnowballPorterFilterFactoryTest.java (working copy) @@ -53,32 +53,12 @@ factory.setLuceneMatchVersion(TEST_VERSION_CURRENT); factory.init(args); - factory.inform(new LinesMockSolrResourceLoader(new ArrayList())); + factory.inform(new StringMockSolrResourceLoader("")); Tokenizer tokenizer = new MockTokenizer( new StringReader(StrUtils.join(Arrays.asList(test), ' ')), MockTokenizer.WHITESPACE, false); TokenStream stream = factory.create(tokenizer); assertTokenStreamContents(stream, gold); } - - class LinesMockSolrResourceLoader implements ResourceLoader { - List lines; - - LinesMockSolrResourceLoader(List lines) { - this.lines = lines; - } - - public List getLines(String resource) throws IOException { - return lines; - } - - public T newInstance(String cname, Class expectedType, String... subpackages) { - return null; - } - - public InputStream openResource(String resource) throws IOException { - return null; - } - } /** * Test the protected words mechanism of SnowballPorterFilterFactory Index: solr/core/src/test/org/apache/solr/analysis/TestCollationKeyFilterFactory.java =================================================================== --- solr/core/src/test/org/apache/solr/analysis/TestCollationKeyFilterFactory.java (revision 1338520) +++ solr/core/src/test/org/apache/solr/analysis/TestCollationKeyFilterFactory.java (working copy) @@ -156,26 +156,6 @@ assertCollatesToSame(tsUmlaut, tsOE); } - private class StringMockSolrResourceLoader implements ResourceLoader { - String text; - - StringMockSolrResourceLoader(String text) { - this.text = text; - } - - public List getLines(String resource) throws IOException { - return null; - } - - public T newInstance(String cname, Class expectedType, String... subpackages) { - return null; - } - - public InputStream openResource(String resource) throws IOException { - return new ByteArrayInputStream(text.getBytes("UTF-8")); - } - } - private void assertCollatesToSame(TokenStream stream1, TokenStream stream2) throws IOException { stream1.reset(); Index: solr/core/src/test/org/apache/solr/analysis/TestSynonymFilterFactory.java =================================================================== --- solr/core/src/test/org/apache/solr/analysis/TestSynonymFilterFactory.java (revision 1338520) +++ solr/core/src/test/org/apache/solr/analysis/TestSynonymFilterFactory.java (working copy) @@ -97,24 +97,4 @@ TokenStream ts = factory.create(new MockTokenizer(new StringReader("GB"), MockTokenizer.WHITESPACE, false)); assertTokenStreamContents(ts, new String[] { "GB" }); } - - private class StringMockSolrResourceLoader implements ResourceLoader { - String text; - - StringMockSolrResourceLoader(String text) { - this.text = text; - } - - public List getLines(String resource) throws IOException { - return Arrays.asList(text.split("\n")); - } - - public T newInstance(String cname, Class expectedType, String... subpackages) { - return null; - } - - public InputStream openResource(String resource) throws IOException { - return new ByteArrayInputStream(text.getBytes("UTF-8")); - } - } } Index: solr/core/src/test/org/apache/solr/core/ResourceLoaderTest.java =================================================================== --- solr/core/src/test/org/apache/solr/core/ResourceLoaderTest.java (revision 1338520) +++ solr/core/src/test/org/apache/solr/core/ResourceLoaderTest.java (working copy) @@ -19,6 +19,7 @@ import junit.framework.Assert; +import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; import org.apache.solr.analysis.KeywordTokenizerFactory; import org.apache.solr.analysis.NGramFilterFactory; @@ -30,6 +31,7 @@ import org.apache.solr.util.plugin.SolrCoreAware; import java.io.File; +import java.io.IOException; import java.io.InputStream; import java.nio.charset.CharacterCodingException; import java.util.Arrays; @@ -114,7 +116,7 @@ } // now make sure getLines skips the BOM... - List lines = loader.getLines(fileWithBom); + List lines = loader.getLines(fileWithBom, IOUtils.CHARSET_UTF_8); assertEquals(1, lines.size()); assertEquals("BOMsAreEvil", lines.get(0)); } @@ -124,10 +126,10 @@ SolrResourceLoader loader = new SolrResourceLoader(null); // ensure we get our exception try { - List lines = loader.getLines(wrongEncoding); + loader.getLines(wrongEncoding, IOUtils.CHARSET_UTF_8); fail(); - } catch (SolrException expected) { - assertTrue(expected.getCause() instanceof CharacterCodingException); + } catch (IOException expected) { + assertTrue(CharacterCodingException.class.isInstance(expected)); } } } Index: solr/core/src/java/org/apache/solr/analysis/TypeTokenFilterFactory.java =================================================================== --- solr/core/src/java/org/apache/solr/analysis/TypeTokenFilterFactory.java (revision 1338520) +++ solr/core/src/java/org/apache/solr/analysis/TypeTokenFilterFactory.java (working copy) @@ -21,6 +21,7 @@ import org.apache.lucene.analysis.core.TypeTokenFilter; import org.apache.lucene.analysis.util.InitializationException; import org.apache.lucene.analysis.util.ResourceLoader; +import org.apache.lucene.util.IOUtils; import org.apache.solr.common.util.StrUtils; import org.apache.lucene.analysis.util.ResourceLoaderAware; import org.apache.lucene.analysis.util.TokenFilterFactory; @@ -54,7 +55,7 @@ if (files.size() > 0) { stopTypes = new HashSet(); for (String file : files) { - List typesLines = loader.getLines(file.trim()); + List typesLines = loader.getLines(file.trim(), IOUtils.CHARSET_UTF_8); stopTypes.addAll(typesLines); } } Index: solr/core/src/java/org/apache/solr/analysis/MappingCharFilterFactory.java =================================================================== --- solr/core/src/java/org/apache/solr/analysis/MappingCharFilterFactory.java (revision 1338520) +++ solr/core/src/java/org/apache/solr/analysis/MappingCharFilterFactory.java (working copy) @@ -1,4 +1,6 @@ -/** +package org.apache.solr.analysis; + +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -15,8 +17,6 @@ * limitations under the License. */ -package org.apache.solr.analysis; - import java.io.File; import java.io.IOException; import java.util.ArrayList; @@ -28,6 +28,7 @@ import org.apache.lucene.analysis.charfilter.MappingCharFilter; import org.apache.lucene.analysis.charfilter.NormalizeCharMap; import org.apache.lucene.analysis.util.*; +import org.apache.lucene.util.IOUtils; import org.apache.solr.common.util.StrUtils; /** @@ -58,13 +59,13 @@ try{ File mappingFile = new File( mapping ); if( mappingFile.exists() ){ - wlist = loader.getLines( mapping ); + wlist = loader.getLines(mapping, IOUtils.CHARSET_UTF_8); } else{ List files = StrUtils.splitFileNames( mapping ); wlist = new ArrayList(); for( String file : files ){ - List lines = loader.getLines( file.trim() ); + List lines = loader.getLines(file.trim(), IOUtils.CHARSET_UTF_8); wlist.addAll( lines ); } } Index: solr/core/src/java/org/apache/solr/analysis/WordDelimiterFilterFactory.java =================================================================== --- solr/core/src/java/org/apache/solr/analysis/WordDelimiterFilterFactory.java (revision 1338520) +++ solr/core/src/java/org/apache/solr/analysis/WordDelimiterFilterFactory.java (working copy) @@ -1,4 +1,6 @@ -/** +package org.apache.solr.analysis; + +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -15,13 +17,12 @@ * limitations under the License. */ -package org.apache.solr.analysis; - import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter; import org.apache.lucene.analysis.miscellaneous.WordDelimiterIterator; import org.apache.lucene.analysis.util.*; +import org.apache.lucene.util.IOUtils; import org.apache.solr.common.util.StrUtils; import java.util.ArrayList; @@ -70,7 +71,7 @@ List files = StrUtils.splitFileNames( types ); List wlist = new ArrayList(); for( String file : files ){ - List lines = loader.getLines( file.trim() ); + List lines = loader.getLines(file.trim(), IOUtils.CHARSET_UTF_8); wlist.addAll( lines ); } typeTable = parseTypes(wlist); Index: solr/core/src/java/org/apache/solr/analysis/StemmerOverrideFilterFactory.java =================================================================== --- solr/core/src/java/org/apache/solr/analysis/StemmerOverrideFilterFactory.java (revision 1338520) +++ solr/core/src/java/org/apache/solr/analysis/StemmerOverrideFilterFactory.java (working copy) @@ -23,6 +23,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter; import org.apache.lucene.analysis.util.*; +import org.apache.lucene.util.IOUtils; import org.apache.solr.common.util.StrUtils; /** @@ -51,7 +52,7 @@ dictionary = new CharArrayMap(luceneMatchVersion, files.size() * 10, ignoreCase); for (String file : files) { - List list = loader.getLines(file.trim()); + List list = loader.getLines(file.trim(), IOUtils.CHARSET_UTF_8); for (String line : list) { String[] mapping = line.split("\t", 2); dictionary.put(mapping[0], mapping[1]); Index: solr/core/src/java/org/apache/solr/analysis/SlowSynonymFilterFactory.java =================================================================== --- solr/core/src/java/org/apache/solr/analysis/SlowSynonymFilterFactory.java (revision 1338520) +++ solr/core/src/java/org/apache/solr/analysis/SlowSynonymFilterFactory.java (working copy) @@ -20,6 +20,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.util.*; +import org.apache.lucene.util.IOUtils; import org.apache.solr.common.util.StrUtils; import java.io.File; @@ -71,12 +72,12 @@ try { File synonymFile = new File(synonyms); if (synonymFile.exists()) { - wlist = loader.getLines(synonyms); + wlist = loader.getLines(synonyms, IOUtils.CHARSET_UTF_8); } else { List files = StrUtils.splitFileNames(synonyms); wlist = new ArrayList(); for (String file : files) { - List lines = loader.getLines(file.trim()); + List lines = loader.getLines(file.trim(), IOUtils.CHARSET_UTF_8); wlist.addAll(lines); } } Index: solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java =================================================================== --- solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java (revision 1338520) +++ solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java (working copy) @@ -63,7 +63,7 @@ /** * @since solr 1.3 */ -public class SolrResourceLoader implements ResourceLoader +public class SolrResourceLoader extends ResourceLoader { public static final Logger log = LoggerFactory.getLogger(SolrResourceLoader.class); @@ -296,73 +296,6 @@ return is; } - /** - * Accesses a resource by name and returns the (non comment) lines - * containing data. - * - *

- * A comment line is any line that starts with the character "#" - *

- * - * @param resource - * @return a list of non-blank non-comment lines with whitespace trimmed - * from front and back. - * @throws IOException - */ - public List getLines(String resource) throws IOException { - return getLines(resource, UTF_8); - } - - /** - * Accesses a resource by name and returns the (non comment) lines containing - * data using the given character encoding. - * - *

- * A comment line is any line that starts with the character "#" - *

- * - * @param resource the file to be read - * @param encoding - * @return a list of non-blank non-comment lines with whitespace trimmed - * @throws IOException - */ - public List getLines(String resource, - String encoding) throws IOException { - return getLines(resource, Charset.forName(encoding)); - } - - - public List getLines(String resource, Charset charset) throws IOException{ - BufferedReader input = null; - ArrayList lines; - try { - input = new BufferedReader(new InputStreamReader(openResource(resource), - charset.newDecoder() - .onMalformedInput(CodingErrorAction.REPORT) - .onUnmappableCharacter(CodingErrorAction.REPORT))); - - lines = new ArrayList(); - for (String word=null; (word=input.readLine())!=null;) { - // skip initial bom marker - if (lines.isEmpty() && word.length() > 0 && word.charAt(0) == '\uFEFF') - word = word.substring(1); - // skip comments - if (word.startsWith("#")) continue; - word=word.trim(); - // skip blank lines - if (word.length()==0) continue; - lines.add(word); - } - } catch (CharacterCodingException ex) { - throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, - "Error loading resource (wrong encoding?): " + resource, ex); - } finally { - if (input != null) - input.close(); - } - return lines; - } - /* * A static map of short class name to fully qualified class name */ Index: solr/core/src/java/org/apache/solr/common/ResourceLoader.java =================================================================== --- solr/core/src/java/org/apache/solr/common/ResourceLoader.java (revision 1338520) +++ solr/core/src/java/org/apache/solr/common/ResourceLoader.java (working copy) @@ -22,5 +22,5 @@ * be removed in (5.0). Use {@link org.apache.lucene.analysis.util.ResourceLoader} */ @Deprecated -public interface ResourceLoader extends org.apache.lucene.analysis.util.ResourceLoader { +public abstract class ResourceLoader extends org.apache.lucene.analysis.util.ResourceLoader { } Index: solr/core/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java =================================================================== --- solr/core/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java (revision 1338520) +++ solr/core/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java (working copy) @@ -18,6 +18,7 @@ import java.io.IOException; import java.io.InputStreamReader; +import java.nio.charset.Charset; import java.util.List; import org.apache.lucene.index.*; @@ -101,7 +102,7 @@ // .setCodec(core.getCodec()) ); - List lines = core.getResourceLoader().getLines(sourceLocation, characterEncoding); + List lines = core.getResourceLoader().getLines(sourceLocation, Charset.forName(characterEncoding)); for (String s : lines) { Document d = new Document(); Index: solr/test-framework/src/java/org/apache/solr/analysis/StringMockSolrResourceLoader.java =================================================================== --- solr/test-framework/src/java/org/apache/solr/analysis/StringMockSolrResourceLoader.java (revision 1338520) +++ solr/test-framework/src/java/org/apache/solr/analysis/StringMockSolrResourceLoader.java (working copy) @@ -20,26 +20,24 @@ import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; +import java.nio.charset.Charset; import java.util.Arrays; import java.util.List; import org.apache.lucene.analysis.util.ResourceLoader; -class StringMockSolrResourceLoader implements ResourceLoader { +class StringMockSolrResourceLoader extends ResourceLoader { String text; StringMockSolrResourceLoader(String text) { this.text = text; } - public List getLines(String resource) throws IOException { + @Override + public List getLines(String resource, Charset charset) throws IOException { return Arrays.asList(text.split("\n")); } - public T newInstance(String cname, Class expectedType, String... subpackages) { - return null; - } - public InputStream openResource(String resource) throws IOException { return new ByteArrayInputStream(text.getBytes("UTF-8")); } Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java =================================================================== --- lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java (revision 1338520) +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java (working copy) @@ -17,16 +17,10 @@ * limitations under the License. */ -import org.apache.lucene.analysis.core.StopFilter; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.Version; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.Reader; -import java.nio.charset.CharsetDecoder; -import java.nio.charset.CodingErrorAction; +import java.io.*; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -107,17 +101,16 @@ assureMatchVersion(); List files = splitFileNames(wordFiles); CharArraySet words = null; - if (files.size() > 0) { - // default stopwords list has 35 or so words, but maybe don't make it that - // big to start - words = new CharArraySet(luceneMatchVersion, - files.size() * 10, ignoreCase); - for (String file : files) { - List wlist = loader.getLines(file.trim()); - words.addAll(StopFilter.makeStopSet(luceneMatchVersion, wlist, - ignoreCase)); - } + + if (files.isEmpty()) { + return words; } + + // default stopwords list has 35 or so words, but maybe don't make it that big to start + words = new CharArraySet(luceneMatchVersion, files.size() * 10, ignoreCase); + for (String file : files) { + words.addAll(loader.getLines(file.trim(), IOUtils.CHARSET_UTF_8)); + } return words; } @@ -128,24 +121,22 @@ assureMatchVersion(); List files = splitFileNames(wordFiles); CharArraySet words = null; - if (files.size() > 0) { - // default stopwords list has 35 or so words, but maybe don't make it that - // big to start - words = new CharArraySet(luceneMatchVersion, - files.size() * 10, ignoreCase); - for (String file : files) { - InputStream stream = null; - Reader reader = null; - try { - stream = loader.openResource(file.trim()); - CharsetDecoder decoder = IOUtils.CHARSET_UTF_8.newDecoder() - .onMalformedInput(CodingErrorAction.REPORT) - .onUnmappableCharacter(CodingErrorAction.REPORT); - reader = new InputStreamReader(stream, decoder); - WordlistLoader.getSnowballWordSet(reader, words); - } finally { - IOUtils.closeWhileHandlingException(reader, stream); - } + + if (files.isEmpty()) { + return words; + } + + // default stopwords list has 35 or so words, but maybe don't make it that big to start + words = new CharArraySet(luceneMatchVersion, files.size() * 10, ignoreCase); + for (String file : files) { + InputStream stream = null; + Reader reader = null; + try { + stream = loader.openResource(file.trim()); + reader = IOUtils.getDecodingReader(stream, IOUtils.CHARSET_UTF_8); + WordlistLoader.getSnowballWordSet(reader, words); + } finally { + IOUtils.closeWhileHandlingException(reader, stream); } } return words; @@ -159,8 +150,9 @@ * @return a list of file names with the escaping backslashed removed */ protected List splitFileNames(String fileNames) { - if (fileNames == null) + if (fileNames == null) { return Collections.emptyList(); + } List result = new ArrayList(); for (String file : fileNames.split("(? * - * @param resource + * @param resource Name of the source whose lines should be retreived + * @param charset Charset for decoding the contents of the resource * @return a list of non-blank non-comment lines with whitespace trimmed * from front and back. - * @throws IOException + * @throws IOException Can be thrown while opening, reading or closing the resource */ - public List getLines(String resource) throws IOException; + public List getLines(String resource, Charset charset) throws IOException { + List contents = new ArrayList(); + + InputStream inputStream = null; + BufferedReader reader = null; + try { + inputStream = openResource(resource); + reader = (BufferedReader) IOUtils.getDecodingReader(inputStream, charset); + String line; + while ((line = reader.readLine()) != null) { + if (contents.isEmpty() && line.length() > 0 && line.charAt(0) == '\uFEFF') { + line = line.substring(1); + } + + line = line.trim(); + + if (!line.startsWith("#") && !line.isEmpty()) { + contents.add(line); + } + } + } finally { + IOUtils.close(reader, inputStream); + } + + return contents; + } - public T newInstance(String cname, Class expectedType, String ... subpackages); + public T newInstance(String cname, Class expectedType, String ... subpackages) { + try { + return Class.forName(cname).asSubclass(expectedType).newInstance(); + } catch (Exception e) { + throw new RuntimeException("Exception thrown while creating instance of [" + cname + "]", e); + } + } } \ No newline at end of file