Index: solr/common-build.xml
===================================================================
--- solr/common-build.xml (revision 1126054)
+++ solr/common-build.xml (working copy)
@@ -188,12 +188,12 @@
+
-
@@ -204,12 +204,12 @@
+
-
@@ -226,6 +226,9 @@
+
+
+
@@ -241,9 +244,6 @@
-
-
-
@@ -252,12 +252,12 @@
+
-
Index: solr/src/test/org/apache/solr/spelling/suggest/LookupBenchmarkTest.java (deleted)
===================================================================
Index: solr/src/test/org/apache/solr/spelling/suggest/PersistenceTest.java (deleted)
===================================================================
Index: solr/src/test/org/apache/solr/spelling/suggest/TermFreq.java (deleted)
===================================================================
Index: solr/src/test/org/apache/solr/spelling/suggest/Average.java (deleted)
===================================================================
Index: solr/src/test/org/apache/solr/spelling/suggest/fst/FSTLookupTest.java (deleted)
===================================================================
Index: solr/src/test/org/apache/solr/spelling/suggest/TermFreqArrayIterator.java (deleted)
===================================================================
Index: solr/src/test-files/Top50KWiki.utf8 (deleted)
===================================================================
Index: solr/src/java/org/apache/solr/spelling/suggest/SortedTermFreqIteratorWrapper.java (deleted)
===================================================================
Index: solr/src/java/org/apache/solr/spelling/suggest/LookupFactory.java
===================================================================
--- solr/src/java/org/apache/solr/spelling/suggest/LookupFactory.java (revision 1126054)
+++ solr/src/java/org/apache/solr/spelling/suggest/LookupFactory.java (working copy)
@@ -1,122 +1,29 @@
package org.apache.solr.spelling.suggest;
-import java.io.File;
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.List;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
-import org.apache.lucene.search.spell.Dictionary;
-import org.apache.lucene.util.PriorityQueue;
+import org.apache.lucene.search.suggest.Lookup;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
-import org.apache.solr.util.TermFreqIterator;
-public abstract class Lookup {
- /**
- * Result of a lookup.
- */
- public static final class LookupResult implements Comparable {
- public final String key;
- public final float value;
-
- public LookupResult(String key, float value) {
- this.key = key;
- this.value = value;
- }
-
- @Override
- public String toString() {
- return key + "/" + value;
- }
-
- /** Compare alphabetically. */
- public int compareTo(LookupResult o) {
- return this.key.compareTo(o.key);
- }
- }
-
- public static final class LookupPriorityQueue extends PriorityQueue {
-
- public LookupPriorityQueue(int size) {
- super(size);
- }
-
- @Override
- protected boolean lessThan(LookupResult a, LookupResult b) {
- return a.value < b.value;
- }
-
- public LookupResult[] getResults() {
- int size = size();
- LookupResult[] res = new LookupResult[size];
- for (int i = size - 1; i >= 0; i--) {
- res[i] = pop();
- }
- return res;
- }
- }
-
- /** Initialize the lookup. */
- public abstract void init(NamedList config, SolrCore core);
-
- /** Build lookup from a dictionary. Some implementations may require sorted
- * or unsorted keys from the dictionary's iterator - use
- * {@link SortedTermFreqIteratorWrapper} or
- * {@link UnsortedTermFreqIteratorWrapper} in such case.
- */
- public void build(Dictionary dict) throws IOException {
- Iterator it = dict.getWordsIterator();
- TermFreqIterator tfit;
- if (it instanceof TermFreqIterator) {
- tfit = (TermFreqIterator)it;
- } else {
- tfit = new TermFreqIterator.TermFreqIteratorWrapper(it);
- }
- build(tfit);
- }
-
- protected abstract void build(TermFreqIterator tfit) throws IOException;
-
- /**
- * Persist the constructed lookup data to a directory. Optional operation.
- * @param storeDir directory where data can be stored.
- * @return true if successful, false if unsuccessful or not supported.
- * @throws IOException when fatal IO error occurs.
- */
- public abstract boolean store(File storeDir) throws IOException;
-
- /**
- * Discard current lookup data and load it from a previously saved copy.
- * Optional operation.
- * @param storeDir directory where lookup data was stored.
- * @return true if completed successfully, false if unsuccessful or not supported.
- * @throws IOException when fatal IO error occurs.
- */
- public abstract boolean load(File storeDir) throws IOException;
-
- /**
- * Look up a key and return possible completion for this key.
- * @param key lookup key. Depending on the implementation this may be
- * a prefix, misspelling, or even infix.
- * @param onlyMorePopular return only more popular results
- * @param num maximum number of results to return
- * @return a list of possible completions, with their relative weight (e.g. popularity)
- */
- public abstract List lookup(String key, boolean onlyMorePopular, int num);
-
- /**
- * Modify the lookup data by recording additional data. Optional operation.
- * @param key new lookup key
- * @param value value to associate with this key
- * @return true if new key is added, false if it already exists or operation
- * is not supported.
- */
- public abstract boolean add(String key, Object value);
-
- /**
- * Get value associated with a specific key.
- * @param key lookup key
- * @return associated value
- */
- public abstract Object get(String key);
+/**
+ * Suggester factory for creating {@link Lookup} instances.
+ */
+public abstract class LookupFactory {
+ public abstract Lookup create(NamedList params, SolrCore core);
}
Index: solr/src/java/org/apache/solr/spelling/suggest/UnsortedTermFreqIteratorWrapper.java (deleted)
===================================================================
Index: solr/src/java/org/apache/solr/spelling/suggest/FileDictionary.java (deleted)
===================================================================
Index: solr/src/java/org/apache/solr/spelling/suggest/BufferingTermFreqIteratorWrapper.java (deleted)
===================================================================
Index: solr/src/java/org/apache/solr/spelling/suggest/Suggester.java
===================================================================
--- solr/src/java/org/apache/solr/spelling/suggest/Suggester.java (revision 1126054)
+++ solr/src/java/org/apache/solr/spelling/suggest/Suggester.java (working copy)
@@ -27,15 +27,20 @@
import org.apache.lucene.analysis.Token;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.spell.Dictionary;
+import org.apache.lucene.search.spell.HighFrequencyDictionary;
+import org.apache.lucene.search.suggest.FileDictionary;
+import org.apache.lucene.search.suggest.Lookup;
+import org.apache.lucene.search.suggest.Lookup.LookupResult;
+
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.spelling.SolrSpellChecker;
import org.apache.solr.spelling.SpellingOptions;
import org.apache.solr.spelling.SpellingResult;
-import org.apache.solr.spelling.suggest.Lookup.LookupResult;
-import org.apache.solr.spelling.suggest.jaspell.JaspellLookup;
-import org.apache.solr.util.HighFrequencyDictionary;
+import org.apache.solr.spelling.suggest.fst.FSTLookupFactory;
+import org.apache.solr.spelling.suggest.jaspell.JaspellLookupFactory;
+import org.apache.solr.spelling.suggest.tst.TSTLookupFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -80,11 +85,18 @@
sourceLocation = (String) config.get(LOCATION);
field = (String)config.get(FIELD);
lookupImpl = (String)config.get(LOOKUP_IMPL);
- if (lookupImpl == null) {
- lookupImpl = JaspellLookup.class.getName();
+
+ // support the old classnames without -Factory for config file backwards compatibility.
+ if (lookupImpl == null || "org.apache.solr.spelling.suggest.jaspell.JaspellLookup".equals(lookupImpl)) {
+ lookupImpl = JaspellLookupFactory.class.getName();
+ } else if ("org.apache.solr.spelling.suggest.tst.TSTLookup".equals(lookupImpl)) {
+ lookupImpl = TSTLookupFactory.class.getName();
+ } else if ("org.apache.solr.spelling.suggest.fst.FSTLookup".equals(lookupImpl)) {
+ lookupImpl = FSTLookupFactory.class.getName();
}
- lookup = (Lookup) core.getResourceLoader().newInstance(lookupImpl);
- lookup.init(config, core);
+
+ LookupFactory factory = (LookupFactory) core.getResourceLoader().newInstance(lookupImpl);
+ lookup = factory.create(config, core);
String store = (String)config.get(STORE_DIR);
if (store != null) {
storeDir = new File(store);
Index: solr/src/java/org/apache/solr/spelling/suggest/Lookup.java (deleted)
===================================================================
Index: solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellLookup.java (deleted)
===================================================================
Index: solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellTernarySearchTrie.java (deleted)
===================================================================
Index: solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellLookupFactory.java
===================================================================
--- solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellLookupFactory.java (revision 1126054)
+++ solr/src/java/org/apache/solr/spelling/suggest/jaspell/JaspellLookupFactory.java (working copy)
@@ -1,182 +1,39 @@
package org.apache.solr.spelling.suggest.jaspell;
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import org.apache.lucene.search.suggest.Lookup;
+import org.apache.lucene.search.suggest.jaspell.JaspellLookup;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
-import org.apache.solr.spelling.suggest.Lookup;
-import org.apache.solr.spelling.suggest.UnsortedTermFreqIteratorWrapper;
-import org.apache.solr.spelling.suggest.jaspell.JaspellTernarySearchTrie.TSTNode;
-import org.apache.solr.util.SortedIterator;
-import org.apache.solr.util.TermFreqIterator;
+import org.apache.solr.spelling.suggest.LookupFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-public class JaspellLookup extends Lookup {
+/**
+ * Factory for {@link JaspellLookup}
+ */
+public class JaspellLookupFactory extends LookupFactory {
private static final Logger LOG = LoggerFactory.getLogger(JaspellLookup.class);
- JaspellTernarySearchTrie trie = new JaspellTernarySearchTrie();
- private boolean usePrefix = true;
- private int editDistance = 2;
@Override
- public void init(NamedList config, SolrCore core) {
- LOG.info("init: " + config);
+ public Lookup create(NamedList params, SolrCore core) {
+ LOG.info("init: " + params);
+ return new JaspellLookup();
}
-
- @Override
- public void build(TermFreqIterator tfit) throws IOException {
- if (tfit instanceof SortedIterator) {
- // make sure it's unsorted
- tfit = new UnsortedTermFreqIteratorWrapper(tfit);
- }
- trie = new JaspellTernarySearchTrie();
- trie.setMatchAlmostDiff(editDistance);
- while (tfit.hasNext()) {
- String key = tfit.next();
- float freq = tfit.freq();
- if (key.length() == 0) {
- continue;
- }
- trie.put(key, new Float(freq));
- }
- }
-
- @Override
- public boolean add(String key, Object value) {
- trie.put(key, value);
- // XXX
- return false;
- }
-
- @Override
- public Object get(String key) {
- return trie.get(key);
- }
-
- @Override
- public List lookup(String key, boolean onlyMorePopular, int num) {
- List res = new ArrayList();
- List list;
- int count = onlyMorePopular ? num * 2 : num;
- if (usePrefix) {
- list = trie.matchPrefix(key, count);
- } else {
- list = trie.matchAlmost(key, count);
- }
- if (list == null || list.size() == 0) {
- return res;
-
- }
- int maxCnt = Math.min(num, list.size());
- if (onlyMorePopular) {
- LookupPriorityQueue queue = new LookupPriorityQueue(num);
- for (String s : list) {
- float freq = (Float)trie.get(s);
- queue.insertWithOverflow(new LookupResult(s, freq));
- }
- for (LookupResult lr : queue.getResults()) {
- res.add(lr);
- }
- } else {
- for (int i = 0; i < maxCnt; i++) {
- String s = list.get(i);
- float freq = (Float)trie.get(s);
- res.add(new LookupResult(s, freq));
- }
- }
- return res;
- }
-
- public static final String FILENAME = "jaspell.dat";
- private static final byte LO_KID = 0x01;
- private static final byte EQ_KID = 0x02;
- private static final byte HI_KID = 0x04;
- private static final byte HAS_VALUE = 0x08;
-
-
- @Override
- public boolean load(File storeDir) throws IOException {
- File data = new File(storeDir, FILENAME);
- if (!data.exists() || !data.canRead()) {
- return false;
- }
- DataInputStream in = new DataInputStream(new FileInputStream(data));
- TSTNode root = trie.new TSTNode('\0', null);
- try {
- readRecursively(in, root);
- trie.setRoot(root);
- } finally {
- in.close();
- }
- return true;
- }
-
- private void readRecursively(DataInputStream in, TSTNode node) throws IOException {
- node.splitchar = in.readChar();
- byte mask = in.readByte();
- if ((mask & HAS_VALUE) != 0) {
- node.data = new Float(in.readFloat());
- }
- if ((mask & LO_KID) != 0) {
- TSTNode kid = trie.new TSTNode('\0', node);
- node.relatives[TSTNode.LOKID] = kid;
- readRecursively(in, kid);
- }
- if ((mask & EQ_KID) != 0) {
- TSTNode kid = trie.new TSTNode('\0', node);
- node.relatives[TSTNode.EQKID] = kid;
- readRecursively(in, kid);
- }
- if ((mask & HI_KID) != 0) {
- TSTNode kid = trie.new TSTNode('\0', node);
- node.relatives[TSTNode.HIKID] = kid;
- readRecursively(in, kid);
- }
- }
-
- @Override
- public boolean store(File storeDir) throws IOException {
- if (!storeDir.exists() || !storeDir.isDirectory() || !storeDir.canWrite()) {
- return false;
- }
- TSTNode root = trie.getRoot();
- if (root == null) { // empty tree
- return false;
- }
- File data = new File(storeDir, FILENAME);
- DataOutputStream out = new DataOutputStream(new FileOutputStream(data));
- try {
- writeRecursively(out, root);
- out.flush();
- } finally {
- out.close();
- }
- return true;
- }
-
- private void writeRecursively(DataOutputStream out, TSTNode node) throws IOException {
- if (node == null) {
- return;
- }
- out.writeChar(node.splitchar);
- byte mask = 0;
- if (node.relatives[TSTNode.LOKID] != null) mask |= LO_KID;
- if (node.relatives[TSTNode.EQKID] != null) mask |= EQ_KID;
- if (node.relatives[TSTNode.HIKID] != null) mask |= HI_KID;
- if (node.data != null) mask |= HAS_VALUE;
- out.writeByte(mask);
- if (node.data != null) {
- out.writeFloat((Float)node.data);
- }
- writeRecursively(out, node.relatives[TSTNode.LOKID]);
- writeRecursively(out, node.relatives[TSTNode.EQKID]);
- writeRecursively(out, node.relatives[TSTNode.HIKID]);
- }
}
Index: solr/src/java/org/apache/solr/spelling/suggest/tst/TSTLookup.java (deleted)
===================================================================
Index: solr/src/java/org/apache/solr/spelling/suggest/tst/TernaryTreeNode.java (deleted)
===================================================================
Index: solr/src/java/org/apache/solr/spelling/suggest/tst/TSTAutocomplete.java (deleted)
===================================================================
Index: solr/src/java/org/apache/solr/spelling/suggest/tst/TSTLookupFactory.java
===================================================================
--- solr/src/java/org/apache/solr/spelling/suggest/tst/TSTLookupFactory.java (revision 0)
+++ solr/src/java/org/apache/solr/spelling/suggest/tst/TSTLookupFactory.java (revision 0)
@@ -0,0 +1,35 @@
+package org.apache.solr.spelling.suggest.tst;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.suggest.Lookup;
+import org.apache.lucene.search.suggest.tst.TSTLookup;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.spelling.suggest.LookupFactory;
+
+/**
+ * Factory for {@link TSTLookup}
+ */
+public class TSTLookupFactory extends LookupFactory {
+
+ @Override
+ public Lookup create(NamedList params, SolrCore core) {
+ return new TSTLookup();
+ }
+}
Index: solr/src/java/org/apache/solr/spelling/suggest/fst/InputStreamDataInput.java (deleted)
===================================================================
Index: solr/src/java/org/apache/solr/spelling/suggest/fst/FSTLookup.java (deleted)
===================================================================
Index: solr/src/java/org/apache/solr/spelling/suggest/fst/OutputStreamDataOutput.java (deleted)
===================================================================
Index: solr/src/java/org/apache/solr/spelling/suggest/fst/FSTLookupFactory.java
===================================================================
--- solr/src/java/org/apache/solr/spelling/suggest/fst/FSTLookupFactory.java (revision 0)
+++ solr/src/java/org/apache/solr/spelling/suggest/fst/FSTLookupFactory.java (revision 0)
@@ -0,0 +1,60 @@
+package org.apache.solr.spelling.suggest.fst;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.suggest.Lookup;
+import org.apache.lucene.search.suggest.fst.FSTLookup;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.spelling.suggest.LookupFactory;
+
+/**
+ * Factory for {@link FSTLookup}
+ */
+public class FSTLookupFactory extends LookupFactory {
+
+ /**
+ * The number of separate buckets for weights (discretization). The more buckets,
+ * the more fine-grained term weights (priorities) can be assigned. The speed of lookup
+ * will not decrease for prefixes which have highly-weighted completions (because these
+ * are filled-in first), but will decrease significantly for low-weighted terms (but
+ * these should be infrequent, so it is all right).
+ *
+ *
The number of buckets must be within [1, 255] range.
+ */
+ public static final String WEIGHT_BUCKETS = "weightBuckets";
+
+ /**
+ * If true, exact suggestions are returned first, even if they are prefixes
+ * of other strings in the automaton (possibly with larger weights).
+ */
+ public static final String EXACT_MATCH_FIRST = "exactMatchFirst";
+
+ @Override
+ public Lookup create(NamedList params, SolrCore core) {
+ int buckets = params.get(WEIGHT_BUCKETS) != null
+ ? Integer.parseInt(params.get(WEIGHT_BUCKETS).toString())
+ : 10;
+
+ boolean exactMatchFirst = params.get(EXACT_MATCH_FIRST) != null
+ ? Boolean.valueOf(params.get(EXACT_MATCH_FIRST).toString())
+ : true;
+
+ return new FSTLookup(buckets, exactMatchFirst);
+ }
+}
Index: solr/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java
===================================================================
--- solr/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java (revision 1126054)
+++ solr/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java (working copy)
@@ -26,12 +26,12 @@
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.search.spell.HighFrequencyDictionary;
import org.apache.lucene.search.spell.PlainTextDictionary;
import org.apache.lucene.store.RAMDirectory;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.FieldType;
-import org.apache.solr.util.HighFrequencyDictionary;
import org.apache.solr.search.SolrIndexSearcher;
/**
Index: solr/src/java/org/apache/solr/spelling/IndexBasedSpellChecker.java
===================================================================
--- solr/src/java/org/apache/solr/spelling/IndexBasedSpellChecker.java (revision 1126054)
+++ solr/src/java/org/apache/solr/spelling/IndexBasedSpellChecker.java (working copy)
@@ -18,10 +18,11 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.search.spell.HighFrequencyDictionary;
+
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.search.SolrIndexSearcher;
-import org.apache.solr.util.HighFrequencyDictionary;
import java.io.File;
import java.io.IOException;
Index: solr/src/java/org/apache/solr/util/TermFreqIterator.java (deleted)
===================================================================
Index: solr/src/java/org/apache/solr/util/SortedIterator.java (deleted)
===================================================================
Index: solr/src/java/org/apache/solr/util/HighFrequencyDictionary.java (deleted)
===================================================================
Index: modules/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java
===================================================================
--- modules/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java (revision 0)
+++ modules/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java (working copy)
@@ -1,34 +1,51 @@
-package org.apache.solr.spelling.suggest;
+package org.apache.lucene.search.suggest;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
import java.net.URL;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import java.util.Random;
import java.util.concurrent.Callable;
+import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.RamUsageEstimator;
-import org.apache.solr.spelling.suggest.fst.FSTLookup;
-import org.apache.solr.spelling.suggest.jaspell.JaspellLookup;
-import org.apache.solr.spelling.suggest.tst.TSTLookup;
-import org.junit.Assert;
+import org.apache.lucene.search.suggest.Lookup;
+import org.apache.lucene.search.suggest.fst.FSTLookup;
+import org.apache.lucene.search.suggest.jaspell.JaspellLookup;
+import org.apache.lucene.search.suggest.tst.TSTLookup;
+
import org.junit.BeforeClass;
import org.junit.Ignore;
-import org.junit.Test;
-import com.google.common.base.Charsets;
-import com.google.common.base.Function;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Lists;
-import com.google.common.io.Resources;
-
/**
* Benchmarks tests for implementations of {@link Lookup} interface.
*/
-@Ignore // COMMENT ME TO RUN BENCHMARKS!
-public class LookupBenchmarkTest {
+@Ignore("COMMENT ME TO RUN BENCHMARKS!")
+public class LookupBenchmarkTest extends LuceneTestCase {
@SuppressWarnings("unchecked")
- private final List> benchmarkClasses = Lists.newArrayList(
+ private final List> benchmarkClasses = Arrays.asList(
JaspellLookup.class,
TSTLookup.class,
FSTLookup.class);
@@ -63,28 +80,32 @@
LookupBenchmarkTest.benchmarkInput = input;
}
+ static final Charset UTF_8 = Charset.forName("UTF-8");
+
/**
* Collect the multilingual input for benchmarks/ tests.
*/
public static List readTop50KWiki() throws Exception {
- List input = Lists.newArrayList();
- URL resource = Thread.currentThread().getContextClassLoader().getResource("Top50KWiki.utf8");
+ List input = new ArrayList();
+ URL resource = LookupBenchmarkTest.class.getResource("Top50KWiki.utf8");
assert resource != null : "Resource missing: Top50KWiki.utf8";
- for (String line : Resources.readLines(resource, Charsets.UTF_8)) {
+ String line = null;
+ BufferedReader br = new BufferedReader(new InputStreamReader(resource.openStream(), UTF_8));
+ while ((line = br.readLine()) != null) {
int tab = line.indexOf('|');
- Assert.assertTrue("No | separator?: " + line, tab >= 0);
+ assertTrue("No | separator?: " + line, tab >= 0);
float weight = Float.parseFloat(line.substring(tab + 1));
String key = line.substring(0, tab);
input.add(new TermFreq(key, weight));
}
+ br.close();
return input;
}
/**
* Test construction time.
*/
- @Test
public void testConstructionTime() throws Exception {
System.err.println("-- construction time");
for (final Class extends Lookup> cls : benchmarkClasses) {
@@ -106,7 +127,6 @@
/**
* Test memory required for the storage.
*/
- @Test
public void testStorageNeeds() throws Exception {
System.err.println("-- RAM consumption");
final RamUsageEstimator rue = new RamUsageEstimator();
@@ -131,7 +151,6 @@
/**
* Test performance of lookup on full hits.
*/
- @Test
public void testPerformanceOnFullHits() throws Exception {
final int minPrefixLen = 100;
final int maxPrefixLen = 200;
@@ -141,7 +160,6 @@
/**
* Test performance of lookup on longer term prefixes (6-9 letters or shorter).
*/
- @Test
public void testPerformanceOnPrefixes6_9() throws Exception {
final int minPrefixLen = 6;
final int maxPrefixLen = 9;
@@ -151,7 +169,6 @@
/**
* Test performance of lookup on short term prefixes (2-4 letters or shorter).
*/
- @Test
public void testPerformanceOnPrefixes2_4() throws Exception {
final int minPrefixLen = 2;
final int maxPrefixLen = 4;
@@ -170,12 +187,11 @@
for (Class extends Lookup> cls : benchmarkClasses) {
final Lookup lookup = buildLookup(cls, dictionaryInput);
- final List input = Lists.newArrayList(Iterables.transform(benchmarkInput, new Function() {
- public String apply(TermFreq tf) {
- return tf.term.substring(0, Math.min(tf.term.length(),
- minPrefixLen + random.nextInt(maxPrefixLen - minPrefixLen + 1)));
- }
- }));
+ final List input = new ArrayList(benchmarkInput.size());
+ for (TermFreq tf : benchmarkInput) {
+ input.add(tf.term.substring(0, Math.min(tf.term.length(),
+ minPrefixLen + random.nextInt(maxPrefixLen - minPrefixLen + 1))));
+ }
BenchmarkResult result = measure(new Callable() {
public Integer call() throws Exception {
@@ -203,7 +219,7 @@
final double NANOS_PER_MS = 1000000;
try {
- List times = Lists.newArrayList();
+ List times = new ArrayList();
for (int i = 0; i < warmup + rounds; i++) {
final long start = System.nanoTime();
guard = callable.call().intValue();
Index: modules/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java
===================================================================
--- modules/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java (revision 0)
+++ modules/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java (working copy)
@@ -14,17 +14,17 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.solr.spelling.suggest;
+package org.apache.lucene.search.suggest;
import java.io.File;
-import org.apache.solr.SolrTestCaseJ4;
-import org.apache.solr.spelling.suggest.fst.FSTLookup;
-import org.apache.solr.spelling.suggest.jaspell.JaspellLookup;
-import org.apache.solr.spelling.suggest.tst.TSTLookup;
-import org.junit.Test;
+import org.apache.lucene.search.suggest.Lookup;
+import org.apache.lucene.search.suggest.fst.FSTLookup;
+import org.apache.lucene.search.suggest.jaspell.JaspellLookup;
+import org.apache.lucene.search.suggest.tst.TSTLookup;
+import org.apache.lucene.util.LuceneTestCase;
-public class PersistenceTest extends SolrTestCaseJ4 {
+public class PersistenceTest extends LuceneTestCase {
public final String[] keys = new String[] {
"one",
"two",
@@ -42,17 +42,14 @@
"fourier",
"fourty"};
- @Test
public void testTSTPersistence() throws Exception {
runTest(TSTLookup.class, true);
}
- @Test
public void testJaspellPersistence() throws Exception {
runTest(JaspellLookup.class, true);
}
- @Test
public void testFSTPersistence() throws Exception {
runTest(FSTLookup.class, false);
}
@@ -68,7 +65,7 @@
lookup.build(new TermFreqArrayIterator(keys));
// Store the suggester.
- File storeDir = new File(TEST_HOME());
+ File storeDir = TEMP_DIR;
lookup.store(storeDir);
// Re-read it from disk.
Index: modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreq.java
===================================================================
--- modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreq.java (revision 0)
+++ modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreq.java (working copy)
@@ -1,5 +1,22 @@
-package org.apache.solr.spelling.suggest;
+package org.apache.lucene.search.suggest;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
public final class TermFreq {
public final String term;
public final float v;
Index: modules/suggest/src/test/org/apache/lucene/search/suggest/Average.java
===================================================================
--- modules/suggest/src/test/org/apache/lucene/search/suggest/Average.java (revision 0)
+++ modules/suggest/src/test/org/apache/lucene/search/suggest/Average.java (working copy)
@@ -1,5 +1,23 @@
-package org.apache.solr.spelling.suggest;
+package org.apache.lucene.search.suggest;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
import java.util.List;
import java.util.Locale;
Index: modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTLookupTest.java
===================================================================
--- modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTLookupTest.java (revision 1126054)
+++ modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTLookupTest.java (working copy)
@@ -1,20 +1,35 @@
-package org.apache.solr.spelling.suggest.fst;
+package org.apache.lucene.search.suggest.fst;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
import java.util.Random;
+import org.apache.lucene.search.suggest.Lookup.LookupResult;
+import org.apache.lucene.search.suggest.fst.FSTLookup;
import org.apache.lucene.util.LuceneTestCase;
-import org.apache.solr.spelling.suggest.Lookup.LookupResult;
-import org.apache.solr.spelling.suggest.LookupBenchmarkTest;
-import org.apache.solr.spelling.suggest.TermFreq;
-import org.apache.solr.spelling.suggest.TermFreqArrayIterator;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.Test;
-import com.google.common.collect.Lists;
+import org.apache.lucene.search.suggest.LookupBenchmarkTest;
+import org.apache.lucene.search.suggest.TermFreq;
+import org.apache.lucene.search.suggest.TermFreqArrayIterator;
/**
* Unit tests for {@link FSTLookup}.
@@ -26,8 +41,8 @@
private FSTLookup lookup;
- @Before
- public void prepare() throws Exception {
+ public void setUp() throws Exception {
+ super.setUp();
final TermFreq[] keys = new TermFreq[] {
tf("one", 0.5f),
tf("oneness", 1),
@@ -51,29 +66,24 @@
lookup.build(new TermFreqArrayIterator(keys));
}
- @Test
public void testExactMatchHighPriority() throws Exception {
assertMatchEquals(lookup.lookup("two", true, 1), "two/1.0");
}
- @Test
public void testExactMatchLowPriority() throws Exception {
assertMatchEquals(lookup.lookup("one", true, 2),
"one/0.0",
"oneness/1.0");
}
- @Test
public void testMiss() throws Exception {
assertMatchEquals(lookup.lookup("xyz", true, 1));
}
- @Test
public void testAlphabeticWithWeights() throws Exception {
assertEquals(0, lookup.lookup("xyz", false, 1).size());
}
- @Test
public void testFullMatchList() throws Exception {
assertMatchEquals(lookup.lookup("one", true, Integer.MAX_VALUE),
"oneness/1.0",
@@ -82,7 +92,6 @@
"one/0.0");
}
- @Test
public void testMultilingualInput() throws Exception {
List input = LookupBenchmarkTest.readTop50KWiki();
@@ -95,7 +104,6 @@
}
}
- @Test
public void testEmptyInput() throws Exception {
lookup = new FSTLookup();
lookup.build(new TermFreqArrayIterator(new TermFreq[0]));
@@ -103,9 +111,8 @@
assertMatchEquals(lookup.lookup("", true, 10));
}
- @Test
public void testRandom() throws Exception {
- List freqs = Lists.newArrayList();
+ List freqs = new ArrayList();
Random rnd = random;
for (int i = 0; i < 5000; i++) {
freqs.add(new TermFreq("" + rnd.nextLong(), rnd.nextInt(100)));
@@ -118,7 +125,7 @@
for (int i = 1; i < term.length(); i++) {
String prefix = term.substring(0, i);
for (LookupResult lr : lookup.lookup(prefix, true, 10)) {
- Assert.assertTrue(lr.key.startsWith(prefix));
+ assertTrue(lr.key.startsWith(prefix));
}
}
}
Index: modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreqArrayIterator.java
===================================================================
--- modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreqArrayIterator.java (revision 0)
+++ modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreqArrayIterator.java (working copy)
@@ -1,9 +1,26 @@
-package org.apache.solr.spelling.suggest;
+package org.apache.lucene.search.suggest;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
import java.util.Arrays;
import java.util.Iterator;
-import org.apache.solr.util.TermFreqIterator;
+import org.apache.lucene.search.spell.TermFreqIterator;
/**
* A {@link TermFreqIterator} over a sequence of {@link TermFreq}s.
Index: modules/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java
===================================================================
--- modules/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java (revision 1126054)
+++ modules/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java (working copy)
@@ -1,4 +1,4 @@
-package org.apache.solr.util;
+package org.apache.lucene.search.spell;
import java.util.Iterator;
@@ -7,9 +7,9 @@
public float freq();
public static class TermFreqIteratorWrapper implements TermFreqIterator {
- private Iterator wrapped;
+ private Iterator wrapped;
- public TermFreqIteratorWrapper(Iterator wrapped) {
+ public TermFreqIteratorWrapper(Iterator wrapped) {
this.wrapped = wrapped;
}
Index: modules/suggest/src/java/org/apache/lucene/search/spell/SortedIterator.java
===================================================================
--- modules/suggest/src/java/org/apache/lucene/search/spell/SortedIterator.java (revision 1126054)
+++ modules/suggest/src/java/org/apache/lucene/search/spell/SortedIterator.java (working copy)
@@ -1,4 +1,4 @@
-package org.apache.solr.util;
+package org.apache.lucene.search.spell;
import java.util.Iterator;
Index: modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java
===================================================================
--- modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java (revision 1126054)
+++ modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java (working copy)
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.solr.util;
+package org.apache.lucene.search.spell;
import java.io.IOException;
import java.util.Iterator;
@@ -49,7 +49,7 @@
this.thresh = thresh;
}
- public final Iterator getWordsIterator() {
+ public final Iterator getWordsIterator() {
return new HighFrequencyIterator();
}
Index: modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java
===================================================================
--- modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java (revision 1126054)
+++ modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java (working copy)
@@ -1,9 +1,9 @@
-package org.apache.solr.spelling.suggest;
+package org.apache.lucene.search.suggest;
import java.util.Collections;
-import org.apache.solr.util.SortedIterator;
-import org.apache.solr.util.TermFreqIterator;
+import org.apache.lucene.search.spell.SortedIterator;
+import org.apache.lucene.search.spell.TermFreqIterator;
/**
* This wrapper buffers incoming elements and makes sure they are sorted in
Index: modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java
===================================================================
--- modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java (revision 1126054)
+++ modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java (working copy)
@@ -1,8 +1,8 @@
-package org.apache.solr.spelling.suggest;
+package org.apache.lucene.search.suggest;
import java.util.Collections;
-import org.apache.solr.util.TermFreqIterator;
+import org.apache.lucene.search.spell.TermFreqIterator;
/**
* This wrapper buffers the incoming elements and makes sure they are in
Index: modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java
===================================================================
--- modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java (revision 1126054)
+++ modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java (working copy)
@@ -1,4 +1,4 @@
-package org.apache.solr.spelling.suggest;
+package org.apache.lucene.search.suggest;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -21,7 +21,7 @@
import java.io.*;
import org.apache.lucene.search.spell.Dictionary;
-import org.apache.solr.util.TermFreqIterator;
+import org.apache.lucene.search.spell.TermFreqIterator;
/**
Index: modules/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java
===================================================================
--- modules/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java (revision 1126054)
+++ modules/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java (working copy)
@@ -1,10 +1,10 @@
-package org.apache.solr.spelling.suggest;
+package org.apache.lucene.search.suggest;
import java.util.ArrayList;
import java.util.List;
-import org.apache.solr.util.TermFreqIterator;
+import org.apache.lucene.search.spell.TermFreqIterator;
/**
* This wrapper buffers incoming elements.
Index: modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java
===================================================================
--- modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java (revision 1126054)
+++ modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java (working copy)
@@ -1,4 +1,4 @@
-package org.apache.solr.spelling.suggest;
+package org.apache.lucene.search.suggest;
import java.io.File;
import java.io.IOException;
@@ -6,10 +6,8 @@
import java.util.List;
import org.apache.lucene.search.spell.Dictionary;
+import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.util.PriorityQueue;
-import org.apache.solr.common.util.NamedList;
-import org.apache.solr.core.SolrCore;
-import org.apache.solr.util.TermFreqIterator;
public abstract class Lookup {
/**
@@ -56,9 +54,6 @@
}
}
- /** Initialize the lookup. */
- public abstract void init(NamedList config, SolrCore core);
-
/** Build lookup from a dictionary. Some implementations may require sorted
* or unsorted keys from the dictionary's iterator - use
* {@link SortedTermFreqIteratorWrapper} or
@@ -75,7 +70,7 @@
build(tfit);
}
- protected abstract void build(TermFreqIterator tfit) throws IOException;
+ public abstract void build(TermFreqIterator tfit) throws IOException;
/**
* Persist the constructed lookup data to a directory. Optional operation.
Index: modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java
===================================================================
--- modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java (revision 1126054)
+++ modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java (working copy)
@@ -1,4 +1,4 @@
-package org.apache.solr.spelling.suggest.jaspell;
+package org.apache.lucene.search.suggest.jaspell;
import java.io.DataInputStream;
import java.io.DataOutputStream;
@@ -9,28 +9,18 @@
import java.util.ArrayList;
import java.util.List;
-import org.apache.solr.common.util.NamedList;
-import org.apache.solr.core.SolrCore;
-import org.apache.solr.spelling.suggest.Lookup;
-import org.apache.solr.spelling.suggest.UnsortedTermFreqIteratorWrapper;
-import org.apache.solr.spelling.suggest.jaspell.JaspellTernarySearchTrie.TSTNode;
-import org.apache.solr.util.SortedIterator;
-import org.apache.solr.util.TermFreqIterator;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+import org.apache.lucene.search.spell.SortedIterator;
+import org.apache.lucene.search.spell.TermFreqIterator;
+import org.apache.lucene.search.suggest.Lookup;
+import org.apache.lucene.search.suggest.UnsortedTermFreqIteratorWrapper;
+import org.apache.lucene.search.suggest.jaspell.JaspellTernarySearchTrie.TSTNode;
public class JaspellLookup extends Lookup {
- private static final Logger LOG = LoggerFactory.getLogger(JaspellLookup.class);
JaspellTernarySearchTrie trie = new JaspellTernarySearchTrie();
private boolean usePrefix = true;
private int editDistance = 2;
@Override
- public void init(NamedList config, SolrCore core) {
- LOG.info("init: " + config);
- }
-
- @Override
public void build(TermFreqIterator tfit) throws IOException {
if (tfit instanceof SortedIterator) {
// make sure it's unsorted
Index: modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java
===================================================================
--- modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java (revision 1126054)
+++ modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java (working copy)
@@ -1,4 +1,4 @@
-package org.apache.solr.spelling.suggest.jaspell;
+package org.apache.lucene.search.suggest.jaspell;
/**
* Copyright (c) 2005 Bruno Martins
Index: modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java
===================================================================
--- modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java (revision 1126054)
+++ modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java (working copy)
@@ -1,4 +1,4 @@
-package org.apache.solr.spelling.suggest.tst;
+package org.apache.lucene.search.suggest.tst;
import java.io.DataInputStream;
import java.io.DataOutputStream;
@@ -9,22 +9,16 @@
import java.util.ArrayList;
import java.util.List;
-import org.apache.solr.common.util.NamedList;
-import org.apache.solr.core.SolrCore;
-import org.apache.solr.spelling.suggest.Lookup;
-import org.apache.solr.spelling.suggest.SortedTermFreqIteratorWrapper;
-import org.apache.solr.util.SortedIterator;
-import org.apache.solr.util.TermFreqIterator;
+import org.apache.lucene.search.suggest.Lookup;
+import org.apache.lucene.search.suggest.SortedTermFreqIteratorWrapper;
+import org.apache.lucene.search.spell.SortedIterator;
+import org.apache.lucene.search.spell.TermFreqIterator;
public class TSTLookup extends Lookup {
TernaryTreeNode root = new TernaryTreeNode();
TSTAutocomplete autocomplete = new TSTAutocomplete();
@Override
- public void init(NamedList config, SolrCore core) {
- }
-
- @Override
public void build(TermFreqIterator tfit) throws IOException {
root = new TernaryTreeNode();
// buffer first
Index: modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TernaryTreeNode.java
===================================================================
--- modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TernaryTreeNode.java (revision 1126054)
+++ modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TernaryTreeNode.java (working copy)
@@ -1,4 +1,4 @@
-package org.apache.solr.spelling.suggest.tst;
+package org.apache.lucene.search.suggest.tst;
/**
* The class creates a TST node.
Index: modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTAutocomplete.java
===================================================================
--- modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTAutocomplete.java (revision 1126054)
+++ modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTAutocomplete.java (working copy)
@@ -1,4 +1,4 @@
-package org.apache.solr.spelling.suggest.tst;
+package org.apache.lucene.search.suggest.tst;
import java.util.*;
Index: modules/suggest/src/java/org/apache/lucene/search/suggest/fst/InputStreamDataInput.java
===================================================================
--- modules/suggest/src/java/org/apache/lucene/search/suggest/fst/InputStreamDataInput.java (revision 1126054)
+++ modules/suggest/src/java/org/apache/lucene/search/suggest/fst/InputStreamDataInput.java (working copy)
@@ -1,10 +1,9 @@
-package org.apache.solr.spelling.suggest.fst;
+package org.apache.lucene.search.suggest.fst;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import org.apache.lucene.store.DataInput;
-import com.google.common.io.ByteStreams;
/**
* A {@link DataInput} wrapping a plain {@link InputStream}.
@@ -26,6 +25,8 @@
@Override
public void readBytes(byte[] b, int offset, int len) throws IOException {
- ByteStreams.readFully(is, b, offset, len);
+ if (is.read(b, offset, len) != len) {
+ throw new EOFException();
+ }
}
}
Index: modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTLookup.java
===================================================================
--- modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTLookup.java (revision 1126054)
+++ modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTLookup.java (working copy)
@@ -1,4 +1,4 @@
-package org.apache.solr.spelling.suggest.fst;
+package org.apache.lucene.search.suggest.fst;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
@@ -13,20 +13,17 @@
import java.util.Comparator;
import java.util.List;
+import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.automaton.fst.Builder;
import org.apache.lucene.util.automaton.fst.FST;
import org.apache.lucene.util.automaton.fst.FST.Arc;
import org.apache.lucene.util.automaton.fst.NoOutputs;
import org.apache.lucene.util.automaton.fst.Outputs;
-import org.apache.solr.common.util.NamedList;
-import org.apache.solr.core.SolrCore;
-import org.apache.solr.spelling.suggest.Lookup;
-import org.apache.solr.spelling.suggest.tst.TSTLookup;
-import org.apache.solr.util.TermFreqIterator;
-import com.google.common.collect.Lists;
-import com.google.common.io.Closeables;
+import org.apache.lucene.search.suggest.Lookup;
+import org.apache.lucene.search.suggest.tst.TSTLookup;
+import org.apache.lucene.search.spell.TermFreqIterator;
/**
* Finite state automata based implementation of {@link Lookup} query
@@ -93,6 +90,16 @@
* nothing else.
*/
public class FSTLookup extends Lookup {
+
+ public FSTLookup() {
+ this(10, true);
+ }
+
+ public FSTLookup(int buckets, boolean exactMatchFirst) {
+ this.buckets = buckets;
+ this.exactMatchFirst = exactMatchFirst;
+ }
+
/** A structure for a single entry (for sorting/ preprocessing). */
private static class Entry {
char [] term;
@@ -104,6 +111,12 @@
}
}
+ /** Serialized automaton file name (storage). */
+ public static final String FILENAME = "fst.dat";
+
+ /** An empty result. */
+ private static final List EMPTY_RESULT = Collections.emptyList();
+
/**
* The number of separate buckets for weights (discretization). The more buckets,
* the more fine-grained term weights (priorities) can be assigned. The speed of lookup
@@ -113,31 +126,15 @@
*
*
The number of buckets must be within [1, 255] range.
*/
- public static final String WEIGHT_BUCKETS = "weightBuckets";
+ private final int buckets;
/**
* If true, exact suggestions are returned first, even if they are prefixes
* of other strings in the automaton (possibly with larger weights).
*/
- public static final String EXACT_MATCH_FIRST = "exactMatchFirst";
+ private final boolean exactMatchFirst;
- /** Serialized automaton file name (storage). */
- public static final String FILENAME = "fst.dat";
-
- /** An empty result. */
- private static final List EMPTY_RESULT = Lists.newArrayList();
-
/**
- * @see #WEIGHT_BUCKETS
- */
- private int buckets = 10;
-
- /**
- * #see #EXACT_MATCH_FIRST
- */
- private boolean exactMatchFirst = true;
-
- /**
* Finite state automaton encoding all the lookup terms. See class
* notes for details.
*/
@@ -151,23 +148,10 @@
/* */
@Override
- @SuppressWarnings("rawtypes")
- public void init(NamedList config, SolrCore core) {
- this.buckets = config.get(WEIGHT_BUCKETS) != null
- ? Integer.parseInt(config.get(WEIGHT_BUCKETS).toString())
- : 10;
-
- this.exactMatchFirst = config.get(EXACT_MATCH_FIRST) != null
- ? Boolean.valueOf(config.get(EXACT_MATCH_FIRST).toString())
- : true;
- }
-
- /* */
- @Override
public void build(TermFreqIterator tfit) throws IOException {
// Buffer the input because we will need it twice: for calculating
// weights distribution and for the actual automata building.
- List entries = Lists.newArrayList();
+ List entries = new ArrayList();
while (tfit.hasNext()) {
String term = tfit.next();
char [] termChars = new char [term.length() + 1]; // add padding for weight.
@@ -200,7 +184,7 @@
@SuppressWarnings("unchecked")
private void cacheRootArcs() throws IOException {
if (automaton != null) {
- List> rootArcs = Lists.newArrayList();
+ List> rootArcs = new ArrayList>();
Arc