Index: solr/core/src/test/org/apache/solr/analysis/TestSynonymFilterFactory.java =================================================================== --- solr/core/src/test/org/apache/solr/analysis/TestSynonymFilterFactory.java (revision 1224761) +++ solr/core/src/test/org/apache/solr/analysis/TestSynonymFilterFactory.java (working copy) @@ -20,14 +20,20 @@ import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; +import java.io.Reader; import java.io.StringReader; +import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; +import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.Analyzer.TokenStreamComponents; import org.apache.lucene.analysis.synonym.SynonymFilter; +import org.apache.lucene.analysis.synonym.SynonymMap; import org.apache.lucene.util.Version; import org.apache.solr.common.ResourceLoader; import org.apache.solr.core.SolrResourceLoader; @@ -65,6 +71,25 @@ new int[] { 1, 0, 0, 0 }); } + /** test multiword offsets with the old impl + * @deprecated Remove this test in Lucene 5.0 */ + @Deprecated + public void testMultiwordOffsetsOld() throws Exception { + SynonymFilterFactory factory = new SynonymFilterFactory(); + Map args = new HashMap(); + args.put("luceneMatchVersion", Version.LUCENE_33.toString()); + args.put("synonyms", "synonyms.txt"); + factory.init(args); + factory.inform(new StringMockSolrResourceLoader("national hockey league, nhl")); + TokenStream ts = factory.create(new MockTokenizer(new StringReader("national hockey league"), MockTokenizer.WHITESPACE, false)); + // WTF? + assertTokenStreamContents(ts, + new String[] { "national", "nhl", "hockey", "league" }, + new int[] { 0, 0, 0, 0 }, + new int[] { 22, 22, 22, 22 }, + new int[] { 1, 0, 1, 1 }); + } + /** if the synonyms are completely empty, test that we still analyze correctly */ public void testEmptySynonyms() throws Exception { SynonymFilterFactory factory = new SynonymFilterFactory(); @@ -85,7 +110,7 @@ } public List getLines(String resource) throws IOException { - return null; + return Arrays.asList(text.split("\n")); } public Object newInstance(String cname, String... subpackages) { Index: modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java =================================================================== --- modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java (revision 1224761) +++ modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymMapFilter.java (working copy) @@ -663,4 +663,24 @@ new String[] { "zoo", "zoo", "zoo", "$", "zoo", "zoo", "zoo" }, new int[] { 1, 0, 1, 1, 1, 0, 1 }); } + + public void testMultiwordOffsets() throws Exception { + b = new SynonymMap.Builder(true); + final boolean keepOrig = true; + add("national hockey league", "nhl", keepOrig); + final SynonymMap map = b.build(); + Analyzer a = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false); + return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, true)); + } + }; + + assertAnalyzesTo(a, "national hockey league", + new String[] { "national", "nhl", "hockey", "league" }, + new int[] { 0, 0, 9, 16 }, + new int[] { 8, 22, 15, 22 }, + new int[] { 1, 0, 1, 1 }); + } }