Index: solr/core/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java
===================================================================
--- solr/core/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java (revision 1304701)
+++ solr/core/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java (working copy)
@@ -32,6 +32,7 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.index.IndexReader;
@@ -69,6 +70,7 @@
public static final String ACCURACY = "accuracy";
public static final String STRING_DISTANCE = "distanceMeasure";
public static final String COMPARATOR_CLASS = "comparatorClass";
+ public static final String ANALYZER_FIELD_TYPE = "spellcheckerAnalyzerFieldType";
public static final String SCORE_COMP = "score";
public static final String FREQ_COMP = "freq";
@@ -121,12 +123,22 @@
} else {
sd = new LevensteinDistance();
}
+ String analyzerFieldType = (String) config.get(ANALYZER_FIELD_TYPE);
+ Analyzer analyzer = null;
+ if (analyzerFieldType != null &&
+ core.getSchema().getFieldTypes().containsKey(analyzerFieldType)) {
+ FieldType fieldType = core.getSchema().getFieldTypes().get(analyzerFieldType);
+ analyzer = fieldType.getQueryAnalyzer();
+ }
try {
initIndex();
spellChecker = new SpellChecker(index, sd, comp);
} catch (IOException e) {
throw new RuntimeException(e);
}
+ if (analyzer != null) {
+ spellChecker.setAnalyzer(analyzer);
+ }
if (accuracy != null) {
try {
this.accuracy = Float.parseFloat(accuracy);
Index: modules/suggest/src/test/org/apache/lucene/search/spell/TestSpellChecker.java
===================================================================
--- modules/suggest/src/test/org/apache/lucene/search/spell/TestSpellChecker.java (revision 1304701)
+++ modules/suggest/src/test/org/apache/lucene/search/spell/TestSpellChecker.java (working copy)
@@ -18,6 +18,7 @@
*/
import java.io.IOException;
+import java.io.Reader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
@@ -26,7 +27,12 @@
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
+import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.Analyzer.TokenStreamComponents;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.CorruptIndexException;
@@ -85,6 +91,13 @@
// twice
writer.addDocument(doc);
}
+ {
+ Document doc = new Document();
+ doc.add(newField("field1", "APPLE", TextField.TYPE_STORED)); // an "APPLE"
+ // in the
+ // index
+ writer.addDocument(doc);
+ }
writer.close();
searchers = Collections.synchronizedList(new ArrayList Example Usage:
*
- *
+ *
* SpellChecker spellchecker = new SpellChecker(spellIndexDirectory);
* // To index a field of a user index:
* spellchecker.indexDictionary(new LuceneDictionary(my_lucene_reader, a_field));
@@ -66,7 +70,7 @@
* spellchecker.indexDictionary(new PlainTextDictionary(new File("myfile.txt")));
* String[] suggestions = spellchecker.suggestSimilar("misspelt", 5);
*
- *
+ *
I.e. if numSug == 1, don't count on that suggestion being the best one. * Thus, you should set this value to at least 5 for a good suggestion. * - * @param word the word you want a spell check done on + * @param surface the word you want a spell check done on * @param numSug the number of suggested words * @throws IOException if the underlying index throws an {@link IOException} * @throws AlreadyClosedException if the Spellchecker is already closed @@ -248,8 +283,8 @@ * * @see #suggestSimilar(String, int, IndexReader, String, SuggestMode, float) */ - public String[] suggestSimilar(String word, int numSug) throws IOException { - return this.suggestSimilar(word, numSug, null, null, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); + public String[] suggestSimilar(String surface, int numSug) throws IOException { + return this.suggestSimilar(surface, numSug, null, null, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); } /** @@ -263,7 +298,7 @@ *
I.e. if numSug == 1, don't count on that suggestion being the best one. * Thus, you should set this value to at least 5 for a good suggestion. * - * @param word the word you want a spell check done on + * @param surface the word you want a spell check done on * @param numSug the number of suggested words * @param accuracy The minimum score a suggestion must have in order to qualify for inclusion in the results * @throws IOException if the underlying index throws an {@link IOException} @@ -272,18 +307,18 @@ * * @see #suggestSimilar(String, int, IndexReader, String, SuggestMode, float) */ - public String[] suggestSimilar(String word, int numSug, float accuracy) throws IOException { - return this.suggestSimilar(word, numSug, null, null, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, accuracy); + public String[] suggestSimilar(String surface, int numSug, float accuracy) throws IOException { + return this.suggestSimilar(surface, numSug, null, null, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, accuracy); } /** * Calls {@link #suggestSimilar(String, int, IndexReader, String, SuggestMode, float) - * suggestSimilar(word, numSug, ir, suggestMode, field, this.accuracy)} + * suggestSimilar(surface, numSug, ir, suggestMode, field, this.accuracy)} * */ - public String[] suggestSimilar(String word, int numSug, IndexReader ir, + public String[] suggestSimilar(String surface, int numSug, IndexReader ir, String field, SuggestMode suggestMode) throws IOException { - return suggestSimilar(word, numSug, ir, field, suggestMode, this.accuracy); + return suggestSimilar(surface, numSug, ir, field, suggestMode, this.accuracy); } /** @@ -297,7 +332,7 @@ *
I.e. if numSug == 1, don't count on that suggestion being the best one. * Thus, you should set this value to at least 5 for a good suggestion. * - * @param word the word you want a spell check done on + * @param surface the word you want a spell check done on * @param numSug the number of suggested words * @param ir the indexReader of the user index (can be null see field param) * @param field the field of the user index: if field is not null, the suggested @@ -312,8 +347,12 @@ * of the suggest words in the field of the user index * */ - public String[] suggestSimilar(String word, int numSug, IndexReader ir, + public String[] suggestSimilar(String surface, int numSug, IndexReader ir, String field, SuggestMode suggestMode, float accuracy) throws IOException { + + // the analyzed form + String word = analyzeWord(surface); + // obtainSearcher calls ensureOpen final IndexSearcher indexSearcher = obtainSearcher(); try { @@ -327,11 +366,11 @@ final int lengthWord = word.length(); - final int freq = (ir != null && field != null) ? ir.docFreq(new Term(field, word)) : 0; + final int freq = (ir != null && field != null) ? ir.docFreq(new Term(field, surface)) : 0; final int goalFreq = suggestMode==SuggestMode.SUGGEST_MORE_POPULAR ? freq : 0; // if the word exists in the real index and we don't care for word frequency, return the word itself if (suggestMode==SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX && freq > 0) { - return new String[] { word }; + return new String[] { surface }; } BooleanQuery query = new BooleanQuery(); @@ -376,12 +415,12 @@ sugWord.string = indexSearcher.doc(hits[i].doc).get(F_WORD); // get orig word // don't suggest a word for itself, that would be silly - if (sugWord.string.equals(word)) { + if (sugWord.string.equals(surface)) { continue; } // edit distance - sugWord.score = sd.getDistance(word,sugWord.string); + sugWord.score = sd.getDistance(surface,sugWord.string); if (sugWord.score < accuracy) { continue; } @@ -516,7 +555,8 @@ terms: while ((currentTerm = iter.next()) != null) { - String word = currentTerm.utf8ToString(); + String surface = currentTerm.utf8ToString(); + String word = analyzeWord(surface); int len = word.length(); if (len < 3) { continue; // too short we bail but "too long" is fine... @@ -531,7 +571,7 @@ } // ok index the word - Document doc = createDocument(word, getMin(len), getMax(len)); + Document doc = createDocument(surface, word, getMin(len), getMax(len)); writer.addDocument(doc); } } finally { @@ -571,13 +611,13 @@ return 2; } - private static Document createDocument(String text, int ng1, int ng2) { + private static Document createDocument(String surface, String analyzed, int ng1, int ng2) { Document doc = new Document(); // the word field is never queried on... its indexed so it can be quickly // checked for rebuild (and stored for retrieval). Doesn't need norms or TF/pos - Field f = new Field(F_WORD, text, StringField.TYPE_STORED); + Field f = new Field(F_WORD, surface, StringField.TYPE_STORED); doc.add(f); // orig term - addGram(text, doc, ng1, ng2); + addGram(analyzed, doc, ng1, ng2); return doc; }