Index: solr/common-build.xml =================================================================== --- solr/common-build.xml (revision 1126054) +++ solr/common-build.xml (working copy) @@ -188,12 +188,12 @@ + - @@ -204,12 +204,12 @@ + - @@ -226,6 +226,9 @@ + + + @@ -241,9 +244,6 @@ - - - @@ -252,12 +252,12 @@ + - Index: solr/src/test/org/apache/solr/spelling/suggest/LookupBenchmarkTest.java (deleted) =================================================================== Index: solr/src/test/org/apache/solr/spelling/suggest/PersistenceTest.java (deleted) =================================================================== Index: solr/src/test/org/apache/solr/spelling/suggest/TermFreq.java (deleted) =================================================================== Index: solr/src/test/org/apache/solr/spelling/suggest/Average.java (deleted) =================================================================== Index: solr/src/test/org/apache/solr/spelling/suggest/fst/FSTLookupTest.java (deleted) =================================================================== Index: solr/src/test/org/apache/solr/spelling/suggest/TermFreqArrayIterator.java (deleted) =================================================================== Index: solr/src/test-files/Top50KWiki.utf8 (deleted) =================================================================== Index: solr/src/java/org/apache/solr/spelling/suggest/SortedTermFreqIteratorWrapper.java (deleted) =================================================================== Index: solr/src/java/org/apache/solr/spelling/suggest/LookupFactory.java =================================================================== --- solr/src/java/org/apache/solr/spelling/suggest/LookupFactory.java (revision 1126054) +++ solr/src/java/org/apache/solr/spelling/suggest/LookupFactory.java (working copy) @@ -1,122 +1,29 @@ package org.apache.solr.spelling.suggest; -import java.io.File; -import java.io.IOException; -import java.util.Iterator; -import java.util.List; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ -import org.apache.lucene.search.spell.Dictionary; -import org.apache.lucene.util.PriorityQueue; +import org.apache.lucene.search.suggest.Lookup; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; -import org.apache.solr.util.TermFreqIterator; -public abstract class Lookup { - /** - * Result of a lookup. - */ - public static final class LookupResult implements Comparable { - public final String key; - public final float value; - - public LookupResult(String key, float value) { - this.key = key; - this.value = value; - } - - @Override - public String toString() { - return key + "/" + value; - } - - /** Compare alphabetically. */ - public int compareTo(LookupResult o) { - return this.key.compareTo(o.key); - } - } - - public static final class LookupPriorityQueue extends PriorityQueue { - - public LookupPriorityQueue(int size) { - super(size); - } - - @Override - protected boolean lessThan(LookupResult a, LookupResult b) { - return a.value < b.value; - } - - public LookupResult[] getResults() { - int size = size(); - LookupResult[] res = new LookupResult[size]; - for (int i = size - 1; i >= 0; i--) { - res[i] = pop(); - } - return res; - } - } - - /** Initialize the lookup. */ - public abstract void init(NamedList config, SolrCore core); - - /** Build lookup from a dictionary. Some implementations may require sorted - * or unsorted keys from the dictionary's iterator - use - * {@link SortedTermFreqIteratorWrapper} or - * {@link UnsortedTermFreqIteratorWrapper} in such case. - */ - public void build(Dictionary dict) throws IOException { - Iterator it = dict.getWordsIterator(); - TermFreqIterator tfit; - if (it instanceof TermFreqIterator) { - tfit = (TermFreqIterator)it; - } else { - tfit = new TermFreqIterator.TermFreqIteratorWrapper(it); - } - build(tfit); - } - - protected abstract void build(TermFreqIterator tfit) throws IOException; - - /** - * Persist the constructed lookup data to a directory. Optional operation. - * @param storeDir directory where data can be stored. - * @return true if successful, false if unsuccessful or not supported. - * @throws IOException when fatal IO error occurs. - */ - public abstract boolean store(File storeDir) throws IOException; - - /** - * Discard current lookup data and load it from a previously saved copy. - * Optional operation. - * @param storeDir directory where lookup data was stored. - * @return true if completed successfully, false if unsuccessful or not supported. - * @throws IOException when fatal IO error occurs. - */ - public abstract boolean load(File storeDir) throws IOException; - - /** - * Look up a key and return possible completion for this key. - * @param key lookup key. Depending on the implementation this may be - * a prefix, misspelling, or even infix. - * @param onlyMorePopular return only more popular results - * @param num maximum number of results to return - * @return a list of possible completions, with their relative weight (e.g. popularity) - */ - public abstract List lookup(String key, boolean onlyMorePopular, int num); - - /** - * Modify the lookup data by recording additional data. Optional operation. - * @param key new lookup key - * @param value value to associate with this key - * @return true if new key is added, false if it already exists or operation - * is not supported. - */ - public abstract boolean add(String key, Object value); - - /** - * Get value associated with a specific key. - * @param key lookup key - * @return associated value - */ - public abstract Object get(String key); +/** + * Suggester factory for creating {@link Lookup} instances. + */ +public abstract class LookupFactory { + public abstract Lookup create(NamedList params, SolrCore core); } Index: solr/src/java/org/apache/solr/spelling/suggest/UnsortedTermFreqIteratorWrapper.java (deleted) =================================================================== Index: solr/src/java/org/apache/solr/spelling/suggest/FileDictionary.java (deleted) =================================================================== Index: solr/src/java/org/apache/solr/spelling/suggest/BufferingTermFreqIteratorWrapper.java (deleted) =================================================================== Index: solr/src/java/org/apache/solr/spelling/suggest/Suggester.java =================================================================== --- solr/src/java/org/apache/solr/spelling/suggest/Suggester.java (revision 1126054) +++ solr/src/java/org/apache/solr/spelling/suggest/Suggester.java (working copy) @@ -27,15 +27,20 @@ import org.apache.lucene.analysis.Token; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.spell.Dictionary; +import org.apache.lucene.search.spell.HighFrequencyDictionary; +import org.apache.lucene.search.suggest.FileDictionary; +import org.apache.lucene.search.suggest.Lookup; +import org.apache.lucene.search.suggest.Lookup.LookupResult; + import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.spelling.SolrSpellChecker; import org.apache.solr.spelling.SpellingOptions; import org.apache.solr.spelling.SpellingResult; -import org.apache.solr.spelling.suggest.Lookup.LookupResult; -import org.apache.solr.spelling.suggest.jaspell.JaspellLookup; -import org.apache.solr.util.HighFrequencyDictionary; +import org.apache.solr.spelling.suggest.fst.FSTLookupFactory; +import org.apache.solr.spelling.suggest.jaspell.JaspellLookupFactory; +import org.apache.solr.spelling.suggest.tst.TSTLookupFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -80,11 +85,18 @@ sourceLocation = (String) config.get(LOCATION); field = (String)config.get(FIELD); lookupImpl = (String)config.get(LOOKUP_IMPL); - if (lookupImpl == null) { - lookupImpl = JaspellLookup.class.getName(); + + // support the old classnames without -Factory for config file backwards compatibility. + if (lookupImpl == null || "org.apache.solr.spelling.suggest.jaspell.JaspellLookup".equals(lookupImpl)) { + lookupImpl = JaspellLookupFactory.class.getName(); + } else if ("org.apache.solr.spelling.suggest.tst.TSTLookup".equals(lookupImpl)) { + lookupImpl = TSTLookupFactory.class.getName(); + } else if ("org.apache.solr.spelling.suggest.fst.FSTLookup".equals(lookupImpl)) { + lookupImpl = FSTLookupFactory.class.getName(); } - lookup = (Lookup) core.getResourceLoader().newInstance(lookupImpl); - lookup.init(config, core); + + LookupFactory factory = (LookupFactory) core.getResourceLoader().newInstance(lookupImpl); + lookup = factory.create(config, core); String store = (String)config.get(STORE_DIR); if (store != null) { storeDir = new File(store); Index: solr/src/java/org/apache/solr/spelling/suggest/Lookup.java (deleted) =================================================================== Index: solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellLookup.java (deleted) =================================================================== Index: solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellTernarySearchTrie.java (deleted) =================================================================== Index: solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellLookupFactory.java =================================================================== --- solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellLookupFactory.java (revision 1126054) +++ solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellLookupFactory.java (working copy) @@ -1,182 +1,39 @@ package org.apache.solr.spelling.suggest.jaspell; -import java.io.DataInputStream; -import java.io.DataOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import org.apache.lucene.search.suggest.Lookup; +import org.apache.lucene.search.suggest.jaspell.JaspellLookup; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; -import org.apache.solr.spelling.suggest.Lookup; -import org.apache.solr.spelling.suggest.UnsortedTermFreqIteratorWrapper; -import org.apache.solr.spelling.suggest.jaspell.JaspellTernarySearchTrie.TSTNode; -import org.apache.solr.util.SortedIterator; -import org.apache.solr.util.TermFreqIterator; +import org.apache.solr.spelling.suggest.LookupFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class JaspellLookup extends Lookup { +/** + * Factory for {@link JaspellLookup} + */ +public class JaspellLookupFactory extends LookupFactory { private static final Logger LOG = LoggerFactory.getLogger(JaspellLookup.class); - JaspellTernarySearchTrie trie = new JaspellTernarySearchTrie(); - private boolean usePrefix = true; - private int editDistance = 2; @Override - public void init(NamedList config, SolrCore core) { - LOG.info("init: " + config); + public Lookup create(NamedList params, SolrCore core) { + LOG.info("init: " + params); + return new JaspellLookup(); } - - @Override - public void build(TermFreqIterator tfit) throws IOException { - if (tfit instanceof SortedIterator) { - // make sure it's unsorted - tfit = new UnsortedTermFreqIteratorWrapper(tfit); - } - trie = new JaspellTernarySearchTrie(); - trie.setMatchAlmostDiff(editDistance); - while (tfit.hasNext()) { - String key = tfit.next(); - float freq = tfit.freq(); - if (key.length() == 0) { - continue; - } - trie.put(key, new Float(freq)); - } - } - - @Override - public boolean add(String key, Object value) { - trie.put(key, value); - // XXX - return false; - } - - @Override - public Object get(String key) { - return trie.get(key); - } - - @Override - public List lookup(String key, boolean onlyMorePopular, int num) { - List res = new ArrayList(); - List list; - int count = onlyMorePopular ? num * 2 : num; - if (usePrefix) { - list = trie.matchPrefix(key, count); - } else { - list = trie.matchAlmost(key, count); - } - if (list == null || list.size() == 0) { - return res; - - } - int maxCnt = Math.min(num, list.size()); - if (onlyMorePopular) { - LookupPriorityQueue queue = new LookupPriorityQueue(num); - for (String s : list) { - float freq = (Float)trie.get(s); - queue.insertWithOverflow(new LookupResult(s, freq)); - } - for (LookupResult lr : queue.getResults()) { - res.add(lr); - } - } else { - for (int i = 0; i < maxCnt; i++) { - String s = list.get(i); - float freq = (Float)trie.get(s); - res.add(new LookupResult(s, freq)); - } - } - return res; - } - - public static final String FILENAME = "jaspell.dat"; - private static final byte LO_KID = 0x01; - private static final byte EQ_KID = 0x02; - private static final byte HI_KID = 0x04; - private static final byte HAS_VALUE = 0x08; - - - @Override - public boolean load(File storeDir) throws IOException { - File data = new File(storeDir, FILENAME); - if (!data.exists() || !data.canRead()) { - return false; - } - DataInputStream in = new DataInputStream(new FileInputStream(data)); - TSTNode root = trie.new TSTNode('\0', null); - try { - readRecursively(in, root); - trie.setRoot(root); - } finally { - in.close(); - } - return true; - } - - private void readRecursively(DataInputStream in, TSTNode node) throws IOException { - node.splitchar = in.readChar(); - byte mask = in.readByte(); - if ((mask & HAS_VALUE) != 0) { - node.data = new Float(in.readFloat()); - } - if ((mask & LO_KID) != 0) { - TSTNode kid = trie.new TSTNode('\0', node); - node.relatives[TSTNode.LOKID] = kid; - readRecursively(in, kid); - } - if ((mask & EQ_KID) != 0) { - TSTNode kid = trie.new TSTNode('\0', node); - node.relatives[TSTNode.EQKID] = kid; - readRecursively(in, kid); - } - if ((mask & HI_KID) != 0) { - TSTNode kid = trie.new TSTNode('\0', node); - node.relatives[TSTNode.HIKID] = kid; - readRecursively(in, kid); - } - } - - @Override - public boolean store(File storeDir) throws IOException { - if (!storeDir.exists() || !storeDir.isDirectory() || !storeDir.canWrite()) { - return false; - } - TSTNode root = trie.getRoot(); - if (root == null) { // empty tree - return false; - } - File data = new File(storeDir, FILENAME); - DataOutputStream out = new DataOutputStream(new FileOutputStream(data)); - try { - writeRecursively(out, root); - out.flush(); - } finally { - out.close(); - } - return true; - } - - private void writeRecursively(DataOutputStream out, TSTNode node) throws IOException { - if (node == null) { - return; - } - out.writeChar(node.splitchar); - byte mask = 0; - if (node.relatives[TSTNode.LOKID] != null) mask |= LO_KID; - if (node.relatives[TSTNode.EQKID] != null) mask |= EQ_KID; - if (node.relatives[TSTNode.HIKID] != null) mask |= HI_KID; - if (node.data != null) mask |= HAS_VALUE; - out.writeByte(mask); - if (node.data != null) { - out.writeFloat((Float)node.data); - } - writeRecursively(out, node.relatives[TSTNode.LOKID]); - writeRecursively(out, node.relatives[TSTNode.EQKID]); - writeRecursively(out, node.relatives[TSTNode.HIKID]); - } } Index: solr/src/java/org/apache/solr/spelling/suggest/tst/TSTLookup.java (deleted) =================================================================== Index: solr/src/java/org/apache/solr/spelling/suggest/tst/TernaryTreeNode.java (deleted) =================================================================== Index: solr/src/java/org/apache/solr/spelling/suggest/tst/TSTAutocomplete.java (deleted) =================================================================== Index: solr/src/java/org/apache/solr/spelling/suggest/tst/TSTLookupFactory.java =================================================================== --- solr/src/java/org/apache/solr/spelling/suggest/tst/TSTLookupFactory.java (revision 0) +++ solr/src/java/org/apache/solr/spelling/suggest/tst/TSTLookupFactory.java (revision 0) @@ -0,0 +1,35 @@ +package org.apache.solr.spelling.suggest.tst; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.suggest.Lookup; +import org.apache.lucene.search.suggest.tst.TSTLookup; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrCore; +import org.apache.solr.spelling.suggest.LookupFactory; + +/** + * Factory for {@link TSTLookup} + */ +public class TSTLookupFactory extends LookupFactory { + + @Override + public Lookup create(NamedList params, SolrCore core) { + return new TSTLookup(); + } +} Index: solr/src/java/org/apache/solr/spelling/suggest/fst/InputStreamDataInput.java (deleted) =================================================================== Index: solr/src/java/org/apache/solr/spelling/suggest/fst/FSTLookup.java (deleted) =================================================================== Index: solr/src/java/org/apache/solr/spelling/suggest/fst/OutputStreamDataOutput.java (deleted) =================================================================== Index: solr/src/java/org/apache/solr/spelling/suggest/fst/FSTLookupFactory.java =================================================================== --- solr/src/java/org/apache/solr/spelling/suggest/fst/FSTLookupFactory.java (revision 0) +++ solr/src/java/org/apache/solr/spelling/suggest/fst/FSTLookupFactory.java (revision 0) @@ -0,0 +1,60 @@ +package org.apache.solr.spelling.suggest.fst; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.suggest.Lookup; +import org.apache.lucene.search.suggest.fst.FSTLookup; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrCore; +import org.apache.solr.spelling.suggest.LookupFactory; + +/** + * Factory for {@link FSTLookup} + */ +public class FSTLookupFactory extends LookupFactory { + + /** + * The number of separate buckets for weights (discretization). The more buckets, + * the more fine-grained term weights (priorities) can be assigned. The speed of lookup + * will not decrease for prefixes which have highly-weighted completions (because these + * are filled-in first), but will decrease significantly for low-weighted terms (but + * these should be infrequent, so it is all right). + * + *

The number of buckets must be within [1, 255] range. + */ + public static final String WEIGHT_BUCKETS = "weightBuckets"; + + /** + * If true, exact suggestions are returned first, even if they are prefixes + * of other strings in the automaton (possibly with larger weights). + */ + public static final String EXACT_MATCH_FIRST = "exactMatchFirst"; + + @Override + public Lookup create(NamedList params, SolrCore core) { + int buckets = params.get(WEIGHT_BUCKETS) != null + ? Integer.parseInt(params.get(WEIGHT_BUCKETS).toString()) + : 10; + + boolean exactMatchFirst = params.get(EXACT_MATCH_FIRST) != null + ? Boolean.valueOf(params.get(EXACT_MATCH_FIRST).toString()) + : true; + + return new FSTLookup(buckets, exactMatchFirst); + } +} Index: solr/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java =================================================================== --- solr/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java (revision 1126054) +++ solr/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java (working copy) @@ -26,12 +26,12 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.search.spell.HighFrequencyDictionary; import org.apache.lucene.search.spell.PlainTextDictionary; import org.apache.lucene.store.RAMDirectory; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; import org.apache.solr.schema.FieldType; -import org.apache.solr.util.HighFrequencyDictionary; import org.apache.solr.search.SolrIndexSearcher; /** Index: solr/src/java/org/apache/solr/spelling/IndexBasedSpellChecker.java =================================================================== --- solr/src/java/org/apache/solr/spelling/IndexBasedSpellChecker.java (revision 1126054) +++ solr/src/java/org/apache/solr/spelling/IndexBasedSpellChecker.java (working copy) @@ -18,10 +18,11 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.search.spell.HighFrequencyDictionary; + import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; import org.apache.solr.search.SolrIndexSearcher; -import org.apache.solr.util.HighFrequencyDictionary; import java.io.File; import java.io.IOException; Index: solr/src/java/org/apache/solr/util/TermFreqIterator.java (deleted) =================================================================== Index: solr/src/java/org/apache/solr/util/SortedIterator.java (deleted) =================================================================== Index: solr/src/java/org/apache/solr/util/HighFrequencyDictionary.java (deleted) =================================================================== Index: modules/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java =================================================================== --- modules/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java (revision 0) +++ modules/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java (working copy) @@ -1,34 +1,51 @@ -package org.apache.solr.spelling.suggest; +package org.apache.lucene.search.suggest; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.BufferedReader; +import java.io.InputStreamReader; import java.net.URL; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Locale; import java.util.Random; import java.util.concurrent.Callable; +import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.RamUsageEstimator; -import org.apache.solr.spelling.suggest.fst.FSTLookup; -import org.apache.solr.spelling.suggest.jaspell.JaspellLookup; -import org.apache.solr.spelling.suggest.tst.TSTLookup; -import org.junit.Assert; +import org.apache.lucene.search.suggest.Lookup; +import org.apache.lucene.search.suggest.fst.FSTLookup; +import org.apache.lucene.search.suggest.jaspell.JaspellLookup; +import org.apache.lucene.search.suggest.tst.TSTLookup; + import org.junit.BeforeClass; import org.junit.Ignore; -import org.junit.Test; -import com.google.common.base.Charsets; -import com.google.common.base.Function; -import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; -import com.google.common.io.Resources; - /** * Benchmarks tests for implementations of {@link Lookup} interface. */ -@Ignore // COMMENT ME TO RUN BENCHMARKS! -public class LookupBenchmarkTest { +@Ignore("COMMENT ME TO RUN BENCHMARKS!") +public class LookupBenchmarkTest extends LuceneTestCase { @SuppressWarnings("unchecked") - private final List> benchmarkClasses = Lists.newArrayList( + private final List> benchmarkClasses = Arrays.asList( JaspellLookup.class, TSTLookup.class, FSTLookup.class); @@ -63,28 +80,32 @@ LookupBenchmarkTest.benchmarkInput = input; } + static final Charset UTF_8 = Charset.forName("UTF-8"); + /** * Collect the multilingual input for benchmarks/ tests. */ public static List readTop50KWiki() throws Exception { - List input = Lists.newArrayList(); - URL resource = Thread.currentThread().getContextClassLoader().getResource("Top50KWiki.utf8"); + List input = new ArrayList(); + URL resource = LookupBenchmarkTest.class.getResource("Top50KWiki.utf8"); assert resource != null : "Resource missing: Top50KWiki.utf8"; - for (String line : Resources.readLines(resource, Charsets.UTF_8)) { + String line = null; + BufferedReader br = new BufferedReader(new InputStreamReader(resource.openStream(), UTF_8)); + while ((line = br.readLine()) != null) { int tab = line.indexOf('|'); - Assert.assertTrue("No | separator?: " + line, tab >= 0); + assertTrue("No | separator?: " + line, tab >= 0); float weight = Float.parseFloat(line.substring(tab + 1)); String key = line.substring(0, tab); input.add(new TermFreq(key, weight)); } + br.close(); return input; } /** * Test construction time. */ - @Test public void testConstructionTime() throws Exception { System.err.println("-- construction time"); for (final Class cls : benchmarkClasses) { @@ -106,7 +127,6 @@ /** * Test memory required for the storage. */ - @Test public void testStorageNeeds() throws Exception { System.err.println("-- RAM consumption"); final RamUsageEstimator rue = new RamUsageEstimator(); @@ -131,7 +151,6 @@ /** * Test performance of lookup on full hits. */ - @Test public void testPerformanceOnFullHits() throws Exception { final int minPrefixLen = 100; final int maxPrefixLen = 200; @@ -141,7 +160,6 @@ /** * Test performance of lookup on longer term prefixes (6-9 letters or shorter). */ - @Test public void testPerformanceOnPrefixes6_9() throws Exception { final int minPrefixLen = 6; final int maxPrefixLen = 9; @@ -151,7 +169,6 @@ /** * Test performance of lookup on short term prefixes (2-4 letters or shorter). */ - @Test public void testPerformanceOnPrefixes2_4() throws Exception { final int minPrefixLen = 2; final int maxPrefixLen = 4; @@ -170,12 +187,11 @@ for (Class cls : benchmarkClasses) { final Lookup lookup = buildLookup(cls, dictionaryInput); - final List input = Lists.newArrayList(Iterables.transform(benchmarkInput, new Function() { - public String apply(TermFreq tf) { - return tf.term.substring(0, Math.min(tf.term.length(), - minPrefixLen + random.nextInt(maxPrefixLen - minPrefixLen + 1))); - } - })); + final List input = new ArrayList(benchmarkInput.size()); + for (TermFreq tf : benchmarkInput) { + input.add(tf.term.substring(0, Math.min(tf.term.length(), + minPrefixLen + random.nextInt(maxPrefixLen - minPrefixLen + 1)))); + } BenchmarkResult result = measure(new Callable() { public Integer call() throws Exception { @@ -203,7 +219,7 @@ final double NANOS_PER_MS = 1000000; try { - List times = Lists.newArrayList(); + List times = new ArrayList(); for (int i = 0; i < warmup + rounds; i++) { final long start = System.nanoTime(); guard = callable.call().intValue(); Index: modules/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java =================================================================== --- modules/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java (revision 0) +++ modules/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java (working copy) @@ -14,17 +14,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.spelling.suggest; +package org.apache.lucene.search.suggest; import java.io.File; -import org.apache.solr.SolrTestCaseJ4; -import org.apache.solr.spelling.suggest.fst.FSTLookup; -import org.apache.solr.spelling.suggest.jaspell.JaspellLookup; -import org.apache.solr.spelling.suggest.tst.TSTLookup; -import org.junit.Test; +import org.apache.lucene.search.suggest.Lookup; +import org.apache.lucene.search.suggest.fst.FSTLookup; +import org.apache.lucene.search.suggest.jaspell.JaspellLookup; +import org.apache.lucene.search.suggest.tst.TSTLookup; +import org.apache.lucene.util.LuceneTestCase; -public class PersistenceTest extends SolrTestCaseJ4 { +public class PersistenceTest extends LuceneTestCase { public final String[] keys = new String[] { "one", "two", @@ -42,17 +42,14 @@ "fourier", "fourty"}; - @Test public void testTSTPersistence() throws Exception { runTest(TSTLookup.class, true); } - @Test public void testJaspellPersistence() throws Exception { runTest(JaspellLookup.class, true); } - @Test public void testFSTPersistence() throws Exception { runTest(FSTLookup.class, false); } @@ -68,7 +65,7 @@ lookup.build(new TermFreqArrayIterator(keys)); // Store the suggester. - File storeDir = new File(TEST_HOME()); + File storeDir = TEMP_DIR; lookup.store(storeDir); // Re-read it from disk. Index: modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreq.java =================================================================== --- modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreq.java (revision 0) +++ modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreq.java (working copy) @@ -1,5 +1,22 @@ -package org.apache.solr.spelling.suggest; +package org.apache.lucene.search.suggest; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + public final class TermFreq { public final String term; public final float v; Index: modules/suggest/src/test/org/apache/lucene/search/suggest/Average.java =================================================================== --- modules/suggest/src/test/org/apache/lucene/search/suggest/Average.java (revision 0) +++ modules/suggest/src/test/org/apache/lucene/search/suggest/Average.java (working copy) @@ -1,5 +1,23 @@ -package org.apache.solr.spelling.suggest; +package org.apache.lucene.search.suggest; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + import java.util.List; import java.util.Locale; Index: modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTLookupTest.java =================================================================== --- modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTLookupTest.java (revision 1126054) +++ modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTLookupTest.java (working copy) @@ -1,20 +1,35 @@ -package org.apache.solr.spelling.suggest.fst; +package org.apache.lucene.search.suggest.fst; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Locale; import java.util.Random; +import org.apache.lucene.search.suggest.Lookup.LookupResult; +import org.apache.lucene.search.suggest.fst.FSTLookup; import org.apache.lucene.util.LuceneTestCase; -import org.apache.solr.spelling.suggest.Lookup.LookupResult; -import org.apache.solr.spelling.suggest.LookupBenchmarkTest; -import org.apache.solr.spelling.suggest.TermFreq; -import org.apache.solr.spelling.suggest.TermFreqArrayIterator; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; -import com.google.common.collect.Lists; +import org.apache.lucene.search.suggest.LookupBenchmarkTest; +import org.apache.lucene.search.suggest.TermFreq; +import org.apache.lucene.search.suggest.TermFreqArrayIterator; /** * Unit tests for {@link FSTLookup}. @@ -26,8 +41,8 @@ private FSTLookup lookup; - @Before - public void prepare() throws Exception { + public void setUp() throws Exception { + super.setUp(); final TermFreq[] keys = new TermFreq[] { tf("one", 0.5f), tf("oneness", 1), @@ -51,29 +66,24 @@ lookup.build(new TermFreqArrayIterator(keys)); } - @Test public void testExactMatchHighPriority() throws Exception { assertMatchEquals(lookup.lookup("two", true, 1), "two/1.0"); } - @Test public void testExactMatchLowPriority() throws Exception { assertMatchEquals(lookup.lookup("one", true, 2), "one/0.0", "oneness/1.0"); } - @Test public void testMiss() throws Exception { assertMatchEquals(lookup.lookup("xyz", true, 1)); } - @Test public void testAlphabeticWithWeights() throws Exception { assertEquals(0, lookup.lookup("xyz", false, 1).size()); } - @Test public void testFullMatchList() throws Exception { assertMatchEquals(lookup.lookup("one", true, Integer.MAX_VALUE), "oneness/1.0", @@ -82,7 +92,6 @@ "one/0.0"); } - @Test public void testMultilingualInput() throws Exception { List input = LookupBenchmarkTest.readTop50KWiki(); @@ -95,7 +104,6 @@ } } - @Test public void testEmptyInput() throws Exception { lookup = new FSTLookup(); lookup.build(new TermFreqArrayIterator(new TermFreq[0])); @@ -103,9 +111,8 @@ assertMatchEquals(lookup.lookup("", true, 10)); } - @Test public void testRandom() throws Exception { - List freqs = Lists.newArrayList(); + List freqs = new ArrayList(); Random rnd = random; for (int i = 0; i < 5000; i++) { freqs.add(new TermFreq("" + rnd.nextLong(), rnd.nextInt(100))); @@ -118,7 +125,7 @@ for (int i = 1; i < term.length(); i++) { String prefix = term.substring(0, i); for (LookupResult lr : lookup.lookup(prefix, true, 10)) { - Assert.assertTrue(lr.key.startsWith(prefix)); + assertTrue(lr.key.startsWith(prefix)); } } } Index: modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreqArrayIterator.java =================================================================== --- modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreqArrayIterator.java (revision 0) +++ modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreqArrayIterator.java (working copy) @@ -1,9 +1,26 @@ -package org.apache.solr.spelling.suggest; +package org.apache.lucene.search.suggest; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + import java.util.Arrays; import java.util.Iterator; -import org.apache.solr.util.TermFreqIterator; +import org.apache.lucene.search.spell.TermFreqIterator; /** * A {@link TermFreqIterator} over a sequence of {@link TermFreq}s. Index: modules/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java =================================================================== --- modules/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java (revision 1126054) +++ modules/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java (working copy) @@ -1,4 +1,4 @@ -package org.apache.solr.util; +package org.apache.lucene.search.spell; import java.util.Iterator; @@ -7,9 +7,9 @@ public float freq(); public static class TermFreqIteratorWrapper implements TermFreqIterator { - private Iterator wrapped; + private Iterator wrapped; - public TermFreqIteratorWrapper(Iterator wrapped) { + public TermFreqIteratorWrapper(Iterator wrapped) { this.wrapped = wrapped; } Index: modules/suggest/src/java/org/apache/lucene/search/spell/SortedIterator.java =================================================================== --- modules/suggest/src/java/org/apache/lucene/search/spell/SortedIterator.java (revision 1126054) +++ modules/suggest/src/java/org/apache/lucene/search/spell/SortedIterator.java (working copy) @@ -1,4 +1,4 @@ -package org.apache.solr.util; +package org.apache.lucene.search.spell; import java.util.Iterator; Index: modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java =================================================================== --- modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java (revision 1126054) +++ modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java (working copy) @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.solr.util; +package org.apache.lucene.search.spell; import java.io.IOException; import java.util.Iterator; @@ -49,7 +49,7 @@ this.thresh = thresh; } - public final Iterator getWordsIterator() { + public final Iterator getWordsIterator() { return new HighFrequencyIterator(); } Index: modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java =================================================================== --- modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java (revision 1126054) +++ modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java (working copy) @@ -1,9 +1,9 @@ -package org.apache.solr.spelling.suggest; +package org.apache.lucene.search.suggest; import java.util.Collections; -import org.apache.solr.util.SortedIterator; -import org.apache.solr.util.TermFreqIterator; +import org.apache.lucene.search.spell.SortedIterator; +import org.apache.lucene.search.spell.TermFreqIterator; /** * This wrapper buffers incoming elements and makes sure they are sorted in Index: modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java =================================================================== --- modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java (revision 1126054) +++ modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java (working copy) @@ -1,8 +1,8 @@ -package org.apache.solr.spelling.suggest; +package org.apache.lucene.search.suggest; import java.util.Collections; -import org.apache.solr.util.TermFreqIterator; +import org.apache.lucene.search.spell.TermFreqIterator; /** * This wrapper buffers the incoming elements and makes sure they are in Index: modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java =================================================================== --- modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java (revision 1126054) +++ modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java (working copy) @@ -1,4 +1,4 @@ -package org.apache.solr.spelling.suggest; +package org.apache.lucene.search.suggest; /** * Licensed to the Apache Software Foundation (ASF) under one or more @@ -21,7 +21,7 @@ import java.io.*; import org.apache.lucene.search.spell.Dictionary; -import org.apache.solr.util.TermFreqIterator; +import org.apache.lucene.search.spell.TermFreqIterator; /** Index: modules/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java =================================================================== --- modules/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java (revision 1126054) +++ modules/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java (working copy) @@ -1,10 +1,10 @@ -package org.apache.solr.spelling.suggest; +package org.apache.lucene.search.suggest; import java.util.ArrayList; import java.util.List; -import org.apache.solr.util.TermFreqIterator; +import org.apache.lucene.search.spell.TermFreqIterator; /** * This wrapper buffers incoming elements. Index: modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java =================================================================== --- modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java (revision 1126054) +++ modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java (working copy) @@ -1,4 +1,4 @@ -package org.apache.solr.spelling.suggest; +package org.apache.lucene.search.suggest; import java.io.File; import java.io.IOException; @@ -6,10 +6,8 @@ import java.util.List; import org.apache.lucene.search.spell.Dictionary; +import org.apache.lucene.search.spell.TermFreqIterator; import org.apache.lucene.util.PriorityQueue; -import org.apache.solr.common.util.NamedList; -import org.apache.solr.core.SolrCore; -import org.apache.solr.util.TermFreqIterator; public abstract class Lookup { /** @@ -56,9 +54,6 @@ } } - /** Initialize the lookup. */ - public abstract void init(NamedList config, SolrCore core); - /** Build lookup from a dictionary. Some implementations may require sorted * or unsorted keys from the dictionary's iterator - use * {@link SortedTermFreqIteratorWrapper} or @@ -75,7 +70,7 @@ build(tfit); } - protected abstract void build(TermFreqIterator tfit) throws IOException; + public abstract void build(TermFreqIterator tfit) throws IOException; /** * Persist the constructed lookup data to a directory. Optional operation. Index: modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java =================================================================== --- modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java (revision 1126054) +++ modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java (working copy) @@ -1,4 +1,4 @@ -package org.apache.solr.spelling.suggest.jaspell; +package org.apache.lucene.search.suggest.jaspell; import java.io.DataInputStream; import java.io.DataOutputStream; @@ -9,28 +9,18 @@ import java.util.ArrayList; import java.util.List; -import org.apache.solr.common.util.NamedList; -import org.apache.solr.core.SolrCore; -import org.apache.solr.spelling.suggest.Lookup; -import org.apache.solr.spelling.suggest.UnsortedTermFreqIteratorWrapper; -import org.apache.solr.spelling.suggest.jaspell.JaspellTernarySearchTrie.TSTNode; -import org.apache.solr.util.SortedIterator; -import org.apache.solr.util.TermFreqIterator; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import org.apache.lucene.search.spell.SortedIterator; +import org.apache.lucene.search.spell.TermFreqIterator; +import org.apache.lucene.search.suggest.Lookup; +import org.apache.lucene.search.suggest.UnsortedTermFreqIteratorWrapper; +import org.apache.lucene.search.suggest.jaspell.JaspellTernarySearchTrie.TSTNode; public class JaspellLookup extends Lookup { - private static final Logger LOG = LoggerFactory.getLogger(JaspellLookup.class); JaspellTernarySearchTrie trie = new JaspellTernarySearchTrie(); private boolean usePrefix = true; private int editDistance = 2; @Override - public void init(NamedList config, SolrCore core) { - LOG.info("init: " + config); - } - - @Override public void build(TermFreqIterator tfit) throws IOException { if (tfit instanceof SortedIterator) { // make sure it's unsorted Index: modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java =================================================================== --- modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java (revision 1126054) +++ modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java (working copy) @@ -1,4 +1,4 @@ -package org.apache.solr.spelling.suggest.jaspell; +package org.apache.lucene.search.suggest.jaspell; /** * Copyright (c) 2005 Bruno Martins Index: modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java =================================================================== --- modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java (revision 1126054) +++ modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java (working copy) @@ -1,4 +1,4 @@ -package org.apache.solr.spelling.suggest.tst; +package org.apache.lucene.search.suggest.tst; import java.io.DataInputStream; import java.io.DataOutputStream; @@ -9,22 +9,16 @@ import java.util.ArrayList; import java.util.List; -import org.apache.solr.common.util.NamedList; -import org.apache.solr.core.SolrCore; -import org.apache.solr.spelling.suggest.Lookup; -import org.apache.solr.spelling.suggest.SortedTermFreqIteratorWrapper; -import org.apache.solr.util.SortedIterator; -import org.apache.solr.util.TermFreqIterator; +import org.apache.lucene.search.suggest.Lookup; +import org.apache.lucene.search.suggest.SortedTermFreqIteratorWrapper; +import org.apache.lucene.search.spell.SortedIterator; +import org.apache.lucene.search.spell.TermFreqIterator; public class TSTLookup extends Lookup { TernaryTreeNode root = new TernaryTreeNode(); TSTAutocomplete autocomplete = new TSTAutocomplete(); @Override - public void init(NamedList config, SolrCore core) { - } - - @Override public void build(TermFreqIterator tfit) throws IOException { root = new TernaryTreeNode(); // buffer first Index: modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TernaryTreeNode.java =================================================================== --- modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TernaryTreeNode.java (revision 1126054) +++ modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TernaryTreeNode.java (working copy) @@ -1,4 +1,4 @@ -package org.apache.solr.spelling.suggest.tst; +package org.apache.lucene.search.suggest.tst; /** * The class creates a TST node. Index: modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTAutocomplete.java =================================================================== --- modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTAutocomplete.java (revision 1126054) +++ modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTAutocomplete.java (working copy) @@ -1,4 +1,4 @@ -package org.apache.solr.spelling.suggest.tst; +package org.apache.lucene.search.suggest.tst; import java.util.*; Index: modules/suggest/src/java/org/apache/lucene/search/suggest/fst/InputStreamDataInput.java =================================================================== --- modules/suggest/src/java/org/apache/lucene/search/suggest/fst/InputStreamDataInput.java (revision 1126054) +++ modules/suggest/src/java/org/apache/lucene/search/suggest/fst/InputStreamDataInput.java (working copy) @@ -1,10 +1,9 @@ -package org.apache.solr.spelling.suggest.fst; +package org.apache.lucene.search.suggest.fst; import java.io.EOFException; import java.io.IOException; import java.io.InputStream; import org.apache.lucene.store.DataInput; -import com.google.common.io.ByteStreams; /** * A {@link DataInput} wrapping a plain {@link InputStream}. @@ -26,6 +25,8 @@ @Override public void readBytes(byte[] b, int offset, int len) throws IOException { - ByteStreams.readFully(is, b, offset, len); + if (is.read(b, offset, len) != len) { + throw new EOFException(); + } } } Index: modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTLookup.java =================================================================== --- modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTLookup.java (revision 1126054) +++ modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTLookup.java (working copy) @@ -1,4 +1,4 @@ -package org.apache.solr.spelling.suggest.fst; +package org.apache.lucene.search.suggest.fst; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; @@ -13,20 +13,17 @@ import java.util.Comparator; import java.util.List; +import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.automaton.fst.Builder; import org.apache.lucene.util.automaton.fst.FST; import org.apache.lucene.util.automaton.fst.FST.Arc; import org.apache.lucene.util.automaton.fst.NoOutputs; import org.apache.lucene.util.automaton.fst.Outputs; -import org.apache.solr.common.util.NamedList; -import org.apache.solr.core.SolrCore; -import org.apache.solr.spelling.suggest.Lookup; -import org.apache.solr.spelling.suggest.tst.TSTLookup; -import org.apache.solr.util.TermFreqIterator; -import com.google.common.collect.Lists; -import com.google.common.io.Closeables; +import org.apache.lucene.search.suggest.Lookup; +import org.apache.lucene.search.suggest.tst.TSTLookup; +import org.apache.lucene.search.spell.TermFreqIterator; /** * Finite state automata based implementation of {@link Lookup} query @@ -93,6 +90,16 @@ * nothing else. */ public class FSTLookup extends Lookup { + + public FSTLookup() { + this(10, true); + } + + public FSTLookup(int buckets, boolean exactMatchFirst) { + this.buckets = buckets; + this.exactMatchFirst = exactMatchFirst; + } + /** A structure for a single entry (for sorting/ preprocessing). */ private static class Entry { char [] term; @@ -104,6 +111,12 @@ } } + /** Serialized automaton file name (storage). */ + public static final String FILENAME = "fst.dat"; + + /** An empty result. */ + private static final List EMPTY_RESULT = Collections.emptyList(); + /** * The number of separate buckets for weights (discretization). The more buckets, * the more fine-grained term weights (priorities) can be assigned. The speed of lookup @@ -113,31 +126,15 @@ * *

The number of buckets must be within [1, 255] range. */ - public static final String WEIGHT_BUCKETS = "weightBuckets"; + private final int buckets; /** * If true, exact suggestions are returned first, even if they are prefixes * of other strings in the automaton (possibly with larger weights). */ - public static final String EXACT_MATCH_FIRST = "exactMatchFirst"; + private final boolean exactMatchFirst; - /** Serialized automaton file name (storage). */ - public static final String FILENAME = "fst.dat"; - - /** An empty result. */ - private static final List EMPTY_RESULT = Lists.newArrayList(); - /** - * @see #WEIGHT_BUCKETS - */ - private int buckets = 10; - - /** - * #see #EXACT_MATCH_FIRST - */ - private boolean exactMatchFirst = true; - - /** * Finite state automaton encoding all the lookup terms. See class * notes for details. */ @@ -151,23 +148,10 @@ /* */ @Override - @SuppressWarnings("rawtypes") - public void init(NamedList config, SolrCore core) { - this.buckets = config.get(WEIGHT_BUCKETS) != null - ? Integer.parseInt(config.get(WEIGHT_BUCKETS).toString()) - : 10; - - this.exactMatchFirst = config.get(EXACT_MATCH_FIRST) != null - ? Boolean.valueOf(config.get(EXACT_MATCH_FIRST).toString()) - : true; - } - - /* */ - @Override public void build(TermFreqIterator tfit) throws IOException { // Buffer the input because we will need it twice: for calculating // weights distribution and for the actual automata building. - List entries = Lists.newArrayList(); + List entries = new ArrayList(); while (tfit.hasNext()) { String term = tfit.next(); char [] termChars = new char [term.length() + 1]; // add padding for weight. @@ -200,7 +184,7 @@ @SuppressWarnings("unchecked") private void cacheRootArcs() throws IOException { if (automaton != null) { - List> rootArcs = Lists.newArrayList(); + List> rootArcs = new ArrayList>(); Arc arc = automaton.getFirstArc(new Arc()); automaton.readFirstTargetArc(arc, arc); while (true) { @@ -526,7 +510,7 @@ this.automaton = new FST(new InputStreamDataInput(is), NoOutputs.getSingleton()); cacheRootArcs(); } finally { - Closeables.closeQuietly(is); + IOUtils.closeSafely(is); } return true; } @@ -548,7 +532,7 @@ try { this.automaton.save(new OutputStreamDataOutput(os)); } finally { - Closeables.closeQuietly(os); + IOUtils.closeSafely(os); } return true; Index: modules/suggest/src/java/org/apache/lucene/search/suggest/fst/OutputStreamDataOutput.java =================================================================== --- modules/suggest/src/java/org/apache/lucene/search/suggest/fst/OutputStreamDataOutput.java (revision 1126054) +++ modules/suggest/src/java/org/apache/lucene/search/suggest/fst/OutputStreamDataOutput.java (working copy) @@ -1,4 +1,4 @@ -package org.apache.solr.spelling.suggest.fst; +package org.apache.lucene.search.suggest.fst; import java.io.IOException; import java.io.OutputStream; Index: modules/suggest/build.xml =================================================================== --- modules/suggest/build.xml (revision 1126054) +++ modules/suggest/build.xml (working copy) @@ -17,13 +17,17 @@ limitations under the License. --> - + - Spell Checker + Suggest + + + + - + Index: modules/build.xml =================================================================== --- modules/build.xml (revision 1126054) +++ modules/build.xml (working copy) @@ -25,6 +25,7 @@ + @@ -35,6 +36,7 @@ + @@ -45,6 +47,7 @@ + @@ -55,6 +58,7 @@ + @@ -66,6 +70,7 @@ + @@ -96,6 +101,7 @@ + Index: lucene/build.xml =================================================================== --- lucene/build.xml (revision 1126054) +++ lucene/build.xml (working copy) @@ -231,7 +231,6 @@ - @@ -256,7 +255,6 @@ - Index: lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestDirectSpellChecker.java (deleted) =================================================================== Index: lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestLevenshteinDistance.java (deleted) =================================================================== Index: lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestNGramDistance.java (deleted) =================================================================== Index: lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestLuceneDictionary.java (deleted) =================================================================== Index: lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestJaroWinklerDistance.java (deleted) =================================================================== Index: lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java (deleted) =================================================================== Index: lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestPlainTextDictionary.java (deleted) =================================================================== Index: lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java (deleted) =================================================================== Index: lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/NGramDistance.java (deleted) =================================================================== Index: lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LuceneDictionary.java (deleted) =================================================================== Index: lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/StringDistance.java (deleted) =================================================================== Index: lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWordScoreComparator.java (deleted) =================================================================== Index: lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/JaroWinklerDistance.java (deleted) =================================================================== Index: lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWordQueue.java (deleted) =================================================================== Index: lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java (deleted) =================================================================== Index: lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/Dictionary.java (deleted) =================================================================== Index: lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/LevensteinDistance.java (deleted) =================================================================== Index: lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWordFrequencyComparator.java (deleted) =================================================================== Index: lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SuggestWord.java (deleted) =================================================================== Index: lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java (deleted) =================================================================== Index: lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/package.html (deleted) =================================================================== Index: lucene/contrib/spellchecker/src/java/overview.html (deleted) =================================================================== Index: lucene/contrib/spellchecker/build.xml (deleted) =================================================================== Index: dev-tools/eclipse/dot.classpath =================================================================== --- dev-tools/eclipse/dot.classpath (revision 1126054) +++ dev-tools/eclipse/dot.classpath (working copy) @@ -29,8 +29,6 @@ - - @@ -55,6 +53,8 @@ + +