diff --git a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java index 18406f5..1aeabdd 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java @@ -387,7 +387,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader { } @Override - public Comparator getComparator() throws IOException { + public Comparator getComparator() { return BytesRef.getUTF8SortedAsUnicodeComparator(); } } diff --git a/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java b/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java index f43a0e3..c207584 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java @@ -654,7 +654,7 @@ public class DocTermOrds { } @Override - public Comparator getComparator() throws IOException { + public Comparator getComparator() { return termsEnum.getComparator(); } diff --git a/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java b/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java index 9bf15ee..6a1d6e9 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java @@ -174,7 +174,7 @@ public class FilterAtomicReader extends AtomicReader { } @Override - public Comparator getComparator() throws IOException { + public Comparator getComparator() { return in.getComparator(); } diff --git a/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java index 185e897..0995ac1 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java @@ -122,7 +122,7 @@ public abstract class FilteredTermsEnum extends TermsEnum { } @Override - public Comparator getComparator() throws IOException { + public Comparator getComparator() { return tenum.getComparator(); } diff --git a/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java index 9957ec1..8b44e98 100644 --- a/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java @@ -179,13 +179,6 @@ public abstract class TermsEnum implements BytesRefIterator { } }; } - - /** Return the {@link BytesRef} Comparator used to sort - * terms provided by the iterator. This may return - * null if there are no terms. Callers may invoke this - * method many times, so it's best to cache a single - * instance & reuse it. */ - public abstract Comparator getComparator() throws IOException; /** An empty TermsEnum for quickly returning an empty instance e.g. * in {@link org.apache.lucene.search.MultiTermQuery} diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java b/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java index a8e34e7..89240d8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java +++ b/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java @@ -1052,7 +1052,7 @@ class FieldCacheImpl implements FieldCache { } @Override - public Comparator getComparator() throws IOException { + public Comparator getComparator() { return BytesRef.getUTF8SortedAsUnicodeComparator(); } diff --git a/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java b/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java index 2671efe..3edafab 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java @@ -287,7 +287,7 @@ public final class FuzzyTermsEnum extends TermsEnum { } @Override - public Comparator getComparator() throws IOException { + public Comparator getComparator() { return actualEnum.getComparator(); } diff --git a/lucene/core/src/java/org/apache/lucene/util/BytesRefIterator.java b/lucene/core/src/java/org/apache/lucene/util/BytesRefIterator.java index 5809bb5..b22ec1a 100644 --- a/lucene/core/src/java/org/apache/lucene/util/BytesRefIterator.java +++ b/lucene/core/src/java/org/apache/lucene/util/BytesRefIterator.java @@ -18,6 +18,7 @@ package org.apache.lucene.util; */ import java.io.IOException; +import java.util.Comparator; /** * A simple iterator interface for {@link BytesRef} iteration @@ -40,6 +41,14 @@ public interface BytesRefIterator { */ public BytesRef next() throws IOException; + /** + * Return the {@link BytesRef} Comparator used to sort terms provided by the + * iterator. This may return null if there are no items or the iterator is not + * sorted. Callers may invoke this method many times, so it's best to cache a + * single instance & reuse it. + */ + public Comparator getComparator(); + public final static class EmptyBytesRefIterator implements BytesRefIterator { @Override @@ -47,6 +56,10 @@ public interface BytesRefIterator { return null; } + public Comparator getComparator() { + return null; + } + } } diff --git a/lucene/core/src/java/org/apache/lucene/util/CharsRef.java b/lucene/core/src/java/org/apache/lucene/util/CharsRef.java index 726c3f9..8246f33 100644 --- a/lucene/core/src/java/org/apache/lucene/util/CharsRef.java +++ b/lucene/core/src/java/org/apache/lucene/util/CharsRef.java @@ -271,4 +271,20 @@ public final class CharsRef implements Comparable, CharSequence, Clone clone.copyChars(other); return clone; } + + public static CharsRef fromBytes(BytesRef ref, CharsRef chars) { + UnicodeUtil.UTF8toUTF16(ref.bytes, ref.offset, ref.length, chars); + return chars; + } + + public static CharsRef fromBytes(BytesRef ref) { + final CharsRef chars = new CharsRef(ref.length); + return fromBytes(ref, chars); + } + + public void set(char[] chars, int offset, int length) { + this.chars = chars; + this.offset = offset; + this.length = length; + } } \ No newline at end of file diff --git a/modules/suggest/src/java/org/apache/lucene/search/spell/Dictionary.java b/modules/suggest/src/java/org/apache/lucene/search/spell/Dictionary.java index 073da44..4602e24 100755 --- a/modules/suggest/src/java/org/apache/lucene/search/spell/Dictionary.java +++ b/modules/suggest/src/java/org/apache/lucene/search/spell/Dictionary.java @@ -16,6 +16,7 @@ package org.apache.lucene.search.spell; * limitations under the License. */ +import java.io.IOException; import org.apache.lucene.util.BytesRefIterator; /** @@ -30,5 +31,5 @@ public interface Dictionary { * Return all words present in the dictionary * @return Iterator */ - BytesRefIterator getWordsIterator(); + BytesRefIterator getWordsIterator() throws IOException; } diff --git a/modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java b/modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java index 6ac0230..8bd2c2b 100644 --- a/modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java +++ b/modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java @@ -19,16 +19,13 @@ package org.apache.lucene.search.spell; import java.io.IOException; import java.util.Comparator; -import java.util.Iterator; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.Terms; import org.apache.lucene.index.MultiFields; import org.apache.lucene.util.BytesRefIterator; -import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.UnicodeUtil; /** * HighFrequencyDictionary: terms taken from the given field @@ -44,7 +41,6 @@ public class HighFrequencyDictionary implements Dictionary { private IndexReader reader; private String field; private float thresh; - private final CharsRef spare = new CharsRef(); public HighFrequencyDictionary(IndexReader reader, String field, float thresh) { this.reader = reader; @@ -52,63 +48,52 @@ public class HighFrequencyDictionary implements Dictionary { this.thresh = thresh; } - public final BytesRefIterator getWordsIterator() { + public final BytesRefIterator getWordsIterator() throws IOException { return new HighFrequencyIterator(); } - final class HighFrequencyIterator implements TermFreqIterator, SortedIterator { + final class HighFrequencyIterator implements TermFreqIterator { private final BytesRef spare = new BytesRef(); private final TermsEnum termsEnum; private int minNumDocs; + private long freq; - HighFrequencyIterator() { - try { - Terms terms = MultiFields.getTerms(reader, field); - if (terms != null) { - termsEnum = terms.iterator(null); - } else { - termsEnum = null; - } - minNumDocs = (int)(thresh * (float)reader.numDocs()); - } catch (IOException e) { - throw new RuntimeException(e); + HighFrequencyIterator() throws IOException { + Terms terms = MultiFields.getTerms(reader, field); + if (terms != null) { + termsEnum = terms.iterator(null); + } else { + termsEnum = null; } + minNumDocs = (int)(thresh * (float)reader.numDocs()); } private boolean isFrequent(int freq) { return freq >= minNumDocs; } - public float freq() { - try { - return termsEnum.docFreq(); - } catch (IOException ioe) { - throw new RuntimeException(ioe); - } + public long weight() { + return freq; } - @Override public BytesRef next() throws IOException { if (termsEnum != null) { BytesRef next; - while ((next = termsEnum.next()) != null) { + while((next = termsEnum.next()) != null) { if (isFrequent(termsEnum.docFreq())) { + freq = termsEnum.docFreq(); spare.copyBytes(next); return spare; } - } + } } return null; } @Override - public Comparator comparator() { - try { - return termsEnum.getComparator(); - } catch (IOException e) { - throw new RuntimeException(e); - } + public Comparator getComparator() { + return termsEnum.getComparator(); } } } diff --git a/modules/suggest/src/java/org/apache/lucene/search/spell/LuceneDictionary.java b/modules/suggest/src/java/org/apache/lucene/search/spell/LuceneDictionary.java index bd4afcc..ee835e6 100755 --- a/modules/suggest/src/java/org/apache/lucene/search/spell/LuceneDictionary.java +++ b/modules/suggest/src/java/org/apache/lucene/search/spell/LuceneDictionary.java @@ -43,17 +43,12 @@ public class LuceneDictionary implements Dictionary { this.field = field; } - public final BytesRefIterator getWordsIterator() { - - try { - final Terms terms = MultiFields.getTerms(reader, field); - if (terms != null) { - return terms.iterator(null); - } else { - return BytesRefIterator.EMPTY_ITERATOR; - } - } catch (IOException e) { - throw new RuntimeException(e); + public final BytesRefIterator getWordsIterator() throws IOException { + final Terms terms = MultiFields.getTerms(reader, field); + if (terms != null) { + return terms.iterator(null); + } else { + return BytesRefIterator.EMPTY_ITERATOR; } } diff --git a/modules/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java b/modules/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java index 39b1b0e..2be996f 100755 --- a/modules/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java +++ b/modules/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java @@ -18,7 +18,7 @@ package org.apache.lucene.search.spell; */ -import java.util.Iterator; +import java.util.Comparator; import java.io.*; import org.apache.lucene.util.BytesRef; @@ -53,7 +53,7 @@ public class PlainTextDictionary implements Dictionary { in = new BufferedReader(reader); } - public BytesRefIterator getWordsIterator() { + public BytesRefIterator getWordsIterator() throws IOException { return new FileIterator(); } @@ -85,6 +85,11 @@ public class PlainTextDictionary implements Dictionary { } return result; } + + @Override + public Comparator getComparator() { + return null; + } } } diff --git a/modules/suggest/src/java/org/apache/lucene/search/spell/SortedIterator.java b/modules/suggest/src/java/org/apache/lucene/search/spell/SortedIterator.java deleted file mode 100644 index 694e0ca..0000000 --- a/modules/suggest/src/java/org/apache/lucene/search/spell/SortedIterator.java +++ /dev/null @@ -1,33 +0,0 @@ -package org.apache.lucene.search.spell; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.util.Comparator; -import java.util.Iterator; - -import org.apache.lucene.util.BytesRef; - -/** - * Marker interface to signal that elements coming from {@link Iterator} - * come in ascending lexicographic order. - */ -public interface SortedIterator { - - public Comparator comparator(); - -} diff --git a/modules/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java b/modules/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java index 4a6d431..64df768 100644 --- a/modules/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java +++ b/modules/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java @@ -18,12 +18,14 @@ package org.apache.lucene.search.spell; */ import java.io.IOException; +import java.util.Comparator; + import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefIterator; public interface TermFreqIterator extends BytesRefIterator { - public float freq(); + public long weight(); public static class TermFreqIteratorWrapper implements TermFreqIterator { private BytesRefIterator wrapped; @@ -32,12 +34,17 @@ public interface TermFreqIterator extends BytesRefIterator { this.wrapped = wrapped; } - public float freq() { - return 1.0f; + public long weight() { + return 1; } public BytesRef next() throws IOException { return wrapped.next(); } + + @Override + public Comparator getComparator() { + return wrapped.getComparator(); + } } } diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java index a98f4a5..12c89e0 100644 --- a/modules/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java +++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java @@ -18,7 +18,7 @@ package org.apache.lucene.search.suggest; */ import java.io.IOException; - +import java.util.Comparator; import org.apache.lucene.search.spell.TermFreqIterator; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; @@ -27,12 +27,14 @@ import org.apache.lucene.util.BytesRef; * This wrapper buffers incoming elements. */ public class BufferingTermFreqIteratorWrapper implements TermFreqIterator { - + // TODO keep this for now protected BytesRefList entries = new BytesRefList(); protected int curPos = -1; - protected float[] freqs = new float[1]; + protected long[] freqs = new long[1]; private final BytesRef spare = new BytesRef(); + private final Comparator comp; public BufferingTermFreqIteratorWrapper(TermFreqIterator source) throws IOException { + this.comp = source.getComparator(); BytesRef spare; int freqIndex = 0; while((spare = source.next()) != null) { @@ -40,12 +42,12 @@ public class BufferingTermFreqIteratorWrapper implements TermFreqIterator { if (freqIndex >= freqs.length) { freqs = ArrayUtil.grow(freqs, freqs.length+1); } - freqs[freqIndex++] = source.freq(); + freqs[freqIndex++] = source.weight(); } } - public float freq() { + public long weight() { return freqs[curPos]; } @@ -58,5 +60,10 @@ public class BufferingTermFreqIteratorWrapper implements TermFreqIterator { return null; } + @Override + public Comparator getComparator() { + return comp; + } + } diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/BytesRefList.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/BytesRefList.java index bbd0bfa..8925cca 100644 --- a/modules/suggest/src/java/org/apache/lucene/search/suggest/BytesRefList.java +++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/BytesRefList.java @@ -19,6 +19,7 @@ package org.apache.lucene.search.suggest; import java.io.IOException; import java.util.Comparator; + import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.BytesRef; @@ -83,6 +84,11 @@ final class BytesRefList { } return null; } + + @Override + public Comparator getComparator() { + return null; + } }; } diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java index 15e833f..059e1c2 100644 --- a/modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java +++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java @@ -19,6 +19,7 @@ package org.apache.lucene.search.suggest; import java.io.*; +import java.util.Comparator; import org.apache.lucene.search.spell.Dictionary; import org.apache.lucene.search.spell.TermFreqIterator; @@ -56,11 +57,11 @@ public class FileDictionary implements Dictionary { } final class FileIterator implements TermFreqIterator { - private float curFreq; + private long curFreq; private final BytesRef spare = new BytesRef(); - public float freq() { + public long weight() { return curFreq; } @@ -73,7 +74,8 @@ public class FileDictionary implements Dictionary { if (line != null) { String[] fields = line.split("\t"); if (fields.length > 1) { - curFreq = Float.parseFloat(fields[1]); + // keep reading floats for bw compat + curFreq = (int)Float.parseFloat(fields[1]); spare.copyChars(fields[0]); } else { spare.copyChars(line); @@ -86,6 +88,11 @@ public class FileDictionary implements Dictionary { return null; } } + + @Override + public Comparator getComparator() { + return null; + } } } diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java index 1ab0eac..154d05b 100644 --- a/modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java +++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java @@ -21,6 +21,7 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.util.Comparator; import java.util.List; import org.apache.lucene.search.spell.Dictionary; @@ -28,15 +29,15 @@ import org.apache.lucene.search.spell.TermFreqIterator; import org.apache.lucene.util.BytesRefIterator; import org.apache.lucene.util.PriorityQueue; -public abstract class Lookup { +public abstract class Lookup { /** * Result of a lookup. */ public static final class LookupResult implements Comparable { - public final String key; + public final CharSequence key; public final float value; - public LookupResult(String key, float value) { + public LookupResult(CharSequence key, float value) { this.key = key; this.value = value; } @@ -48,10 +49,37 @@ public abstract class Lookup { /** Compare alphabetically. */ public int compareTo(LookupResult o) { - return this.key.compareTo(o.key); + return CHARSEQUENCE_COMPARATOR.compare(key, o.key); } } + public static final Comparator CHARSEQUENCE_COMPARATOR = new CharSequenceComparator(); + + private static class CharSequenceComparator implements Comparator { + + @Override + public int compare(CharSequence o1, CharSequence o2) { + final int l1 = o1.length(); + final int l2 = o2.length(); + + final int aStop; + if (l1 < l2) { + aStop = l1; + } else { + aStop = l2; + } + for (int i = 0; i < aStop; i++) { + int diff = o1.charAt(i) - o2.charAt(i); + if (diff != 0) { + return diff; + } + } + // One is a prefix of the other, or, they are equal: + return l1 - l2; + } + + } + public static final class LookupPriorityQueue extends PriorityQueue { public LookupPriorityQueue(int size) { @@ -99,8 +127,7 @@ public abstract class Lookup { * @param num maximum number of results to return * @return a list of possible completions, with their relative weight (e.g. popularity) */ - // TODO: this should be a BytesRef API? - public abstract List lookup(String key, boolean onlyMorePopular, int num); + public abstract List lookup(CharSequence key, boolean onlyMorePopular, int num); /** * Modify the lookup data by recording additional data. Optional operation. @@ -109,16 +136,14 @@ public abstract class Lookup { * @return true if new key is added, false if it already exists or operation * is not supported. */ - // TODO: this should be a BytesRef API? - public abstract boolean add(String key, Object value); + public abstract boolean add(CharSequence key, V value); /** * Get value associated with a specific key. * @param key lookup key * @return associated value */ - // TODO: this should be a BytesRef API? - public abstract Object get(String key); + public abstract V get(CharSequence key); /** * Persist the constructed lookup data to a directory. Optional operation. diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java index ffa4f9b..2380724 100644 --- a/modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java +++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java @@ -20,7 +20,6 @@ package org.apache.lucene.search.suggest; import java.io.IOException; import java.util.Comparator; -import org.apache.lucene.search.spell.SortedIterator; import org.apache.lucene.search.spell.TermFreqIterator; import org.apache.lucene.util.BytesRef; @@ -28,13 +27,12 @@ import org.apache.lucene.util.BytesRef; * This wrapper buffers incoming elements and makes sure they are sorted in * ascending lexicographic order. */ -public class SortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper implements SortedIterator { - +public class SortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper { + // TODO keep this for now - but the consumer should really sort this stuff on disk with sorter... private final int[] sortedOrds; private int currentOrd = -1; private final BytesRef spare = new BytesRef(); private final Comparator comp; - public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator comp) throws IOException { super(source); @@ -43,7 +41,7 @@ public class SortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrap } @Override - public float freq() { + public long weight() { return freqs[currentOrd]; } @@ -56,9 +54,8 @@ public class SortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrap } @Override - public Comparator comparator() { + public Comparator getComparator() { return comp; } - } diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java index d7b1b60..4a7e3d8 100644 --- a/modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java +++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java @@ -28,7 +28,7 @@ import org.apache.lucene.util.BytesRef; * random order. */ public class UnsortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper { - + // TODO keep this for now private final int[] ords; private int currentOrd = -1; private final BytesRef spare = new BytesRef(); @@ -48,7 +48,7 @@ public class UnsortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWr } @Override - public float freq() { + public long weight() { return freqs[currentOrd]; } diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletion.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletion.java index 23a8df2..59fdc4c 100644 --- a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletion.java +++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletion.java @@ -199,7 +199,7 @@ public class FSTCompletion { * @return Returns the suggestions, sorted by their approximated weight first * (decreasing) and then alphabetically (UTF-8 codepoint order). */ - public List lookup(String key, int num) { + public List lookup(CharSequence key, int num) { if (key.length() == 0 || automaton == null) { return EMPTY_RESULT; } @@ -388,7 +388,7 @@ public class FSTCompletion { * Returns the bucket assigned to a given key (if found) or null if * no exact match exists. */ - public Integer getBucket(String key) { + public Integer getBucket(CharSequence key) { return getExactMatchStartingFromRootArc(0, new BytesRef(key)); } diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java index 4de0d00..f28b849 100644 --- a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java +++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java @@ -41,7 +41,7 @@ import org.apache.lucene.util.fst.NoOutputs; * An adapter from {@link Lookup} API to {@link FSTCompletion}. * *

This adapter differs from {@link FSTCompletion} in that it attempts - * to discretize any "weights" as passed from in {@link TermFreqIterator#freq()} + * to discretize any "weights" as passed from in {@link TermFreqIterator#weight()} * to match the number of buckets. For the rationale for bucketing, see * {@link FSTCompletion}. * @@ -60,7 +60,7 @@ import org.apache.lucene.util.fst.NoOutputs; * * @see FSTCompletion */ -public class FSTCompletionLookup extends Lookup { +public class FSTCompletionLookup extends Lookup { /** * An invalid bucket count if we're creating an object * of this class from an existing FST. @@ -171,7 +171,7 @@ public class FSTCompletionLookup extends Lookup { } output.reset(buffer); - output.writeInt(FloatMagic.toSortable(tfit.freq())); + output.writeInt(FloatMagic.toSortable(tfit.weight())); output.writeBytes(spare.bytes, spare.offset, spare.length); writer.write(buffer, 0, output.getPosition()); } @@ -232,7 +232,7 @@ public class FSTCompletionLookup extends Lookup { } @Override - public List lookup(String key, boolean higherWeightsFirst, int num) { + public List lookup(CharSequence key, boolean higherWeightsFirst, int num) { final List completions; if (higherWeightsFirst) { completions = higherWeightsCompletion.lookup(key, num); @@ -242,19 +242,19 @@ public class FSTCompletionLookup extends Lookup { final ArrayList results = new ArrayList(completions.size()); for (Completion c : completions) { - results.add(new LookupResult(c.utf8.utf8ToString(), c.bucket)); + results.add(new LookupResult(CharsRef.fromBytes(c.utf8), c.bucket)); } return results; } @Override - public boolean add(String key, Object value) { + public boolean add(CharSequence key, Float value) { // Not supported. return false; } @Override - public Float get(String key) { + public Float get(CharSequence key) { Integer bucket = normalCompletion.getBucket(key); if (bucket == null) return null; diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java index 882b133..7da1198 100644 --- a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java +++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java @@ -33,6 +33,7 @@ import org.apache.lucene.store.InputStreamDataInput; import org.apache.lucene.store.OutputStreamDataOutput; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.fst.Builder; @@ -56,7 +57,7 @@ import org.apache.lucene.util.fst.Util.MinResult; * @see Util#shortestPaths(FST, FST.Arc, int) * @lucene.experimental */ -public class WFSTCompletionLookup extends Lookup { +public class WFSTCompletionLookup extends Lookup { /** * File name for the automaton. @@ -121,7 +122,7 @@ public class WFSTCompletionLookup extends Lookup { output.reset(buffer); output.writeBytes(spare.bytes, spare.offset, spare.length); output.writeByte((byte)0); // separator: not used, just for sort order - output.writeInt((int)encodeWeight(iterator.freq())); + output.writeInt((int)encodeWeight(iterator.weight())); writer.write(buffer, 0, output.getPosition()); } writer.close(); @@ -200,7 +201,7 @@ public class WFSTCompletionLookup extends Lookup { } @Override - public List lookup(String key, boolean onlyMorePopular, int num) { + public List lookup(CharSequence key, boolean onlyMorePopular, int num) { assert num > 0; BytesRef scratch = new BytesRef(key); int prefixLength = scratch.length; @@ -218,7 +219,7 @@ public class WFSTCompletionLookup extends Lookup { List results = new ArrayList(num); if (exactFirst && arc.isFinal()) { - results.add(new LookupResult(scratch.utf8ToString(), decodeWeight(prefixOutput + arc.nextFinalOutput))); + results.add(new LookupResult(CharsRef.fromBytes(scratch), decodeWeight(prefixOutput + arc.nextFinalOutput))); if (--num == 0) { return results; // that was quick } @@ -237,7 +238,7 @@ public class WFSTCompletionLookup extends Lookup { Util.toBytesRef(completion.input, suffix); scratch.append(suffix); - results.add(new LookupResult(scratch.utf8ToString(), decodeWeight(prefixOutput + completion.output))); + results.add(new LookupResult(CharsRef.fromBytes(scratch), decodeWeight(prefixOutput + completion.output))); } return results; } @@ -264,7 +265,7 @@ public class WFSTCompletionLookup extends Lookup { } @Override - public boolean add(String key, Object value) { + public boolean add(CharSequence key, Float value) { return false; // Not supported. } @@ -273,7 +274,7 @@ public class WFSTCompletionLookup extends Lookup { * or null if it does not exist. */ @Override - public Float get(String key) { + public Float get(CharSequence key) { Arc arc = new Arc(); Long result = null; try { diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java index 82c2883..ddcd208 100644 --- a/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java +++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java @@ -28,7 +28,6 @@ import java.io.OutputStream; import java.util.ArrayList; import java.util.List; -import org.apache.lucene.search.spell.SortedIterator; import org.apache.lucene.search.spell.TermFreqIterator; import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.UnsortedTermFreqIteratorWrapper; @@ -38,14 +37,14 @@ import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.UnicodeUtil; -public class JaspellLookup extends Lookup { +public class JaspellLookup extends Lookup { JaspellTernarySearchTrie trie = new JaspellTernarySearchTrie(); private boolean usePrefix = true; private int editDistance = 2; @Override public void build(TermFreqIterator tfit) throws IOException { - if (tfit instanceof SortedIterator) { + if (tfit.getComparator() != null) { // make sure it's unsorted // WTF - this could result in yet another sorted iteration.... tfit = new UnsortedTermFreqIteratorWrapper(tfit); @@ -56,7 +55,7 @@ public class JaspellLookup extends Lookup { final CharsRef charsSpare = new CharsRef(); while ((spare = tfit.next()) != null) { - float freq = tfit.freq(); + float freq = tfit.weight(); if (spare.length == 0) { continue; } @@ -67,19 +66,19 @@ public class JaspellLookup extends Lookup { } @Override - public boolean add(String key, Object value) { + public boolean add(CharSequence key, Object value) { trie.put(key, value); // XXX return false; } @Override - public Object get(String key) { + public Object get(CharSequence key) { return trie.get(key); } @Override - public List lookup(String key, boolean onlyMorePopular, int num) { + public List lookup(CharSequence key, boolean onlyMorePopular, int num) { List res = new ArrayList(); List list; int count = onlyMorePopular ? num * 2 : num; @@ -97,7 +96,7 @@ public class JaspellLookup extends Lookup { LookupPriorityQueue queue = new LookupPriorityQueue(num); for (String s : list) { float freq = (Float)trie.get(s); - queue.insertWithOverflow(new LookupResult(s, freq)); + queue.insertWithOverflow(new LookupResult(new CharsRef(s), freq)); } for (LookupResult lr : queue.getResults()) { res.add(lr); @@ -106,7 +105,7 @@ public class JaspellLookup extends Lookup { for (int i = 0; i < maxCnt; i++) { String s = list.get(i); float freq = (Float)trie.get(s); - res.add(new LookupResult(s, freq)); + res.add(new LookupResult(new CharsRef(s), freq)); } } return res; diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java index 3402575..8c82c2a 100644 --- a/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java +++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java @@ -38,6 +38,8 @@ import java.util.List; import java.util.Vector; import java.util.zip.GZIPInputStream; +import org.apache.lucene.util.CharsRef; + /** * Implementation of a Ternary Search Trie, a data structure for storing * String objects that combines the compact size of a binary search @@ -368,8 +370,8 @@ public class JaspellTernarySearchTrie { * A String index. *@return The object retrieved from the Ternary Search Trie. */ - public Object get(String key) { - TSTNode node = getNode(key.trim().toLowerCase()); + public Object get(CharSequence key) { + TSTNode node = getNode(key); if (node == null) { return null; } @@ -435,7 +437,7 @@ public class JaspellTernarySearchTrie { *@return The node object indexed by key. This object is an instance of an * inner class named TernarySearchTrie.TSTNode. */ - public TSTNode getNode(String key) { + public TSTNode getNode(CharSequence key) { return getNode(key, rootNode); } @@ -450,8 +452,7 @@ public class JaspellTernarySearchTrie { *@return The node object indexed by key. This object is an instance of an * inner class named TernarySearchTrie.TSTNode. */ - protected TSTNode getNode(String key2, TSTNode startNode) { - String key = key2.trim().toLowerCase(); + protected TSTNode getNode(CharSequence key, TSTNode startNode) { if (key == null || startNode == null || key.length() == 0) { return null; } @@ -490,7 +491,7 @@ public class JaspellTernarySearchTrie { *@exception IllegalArgumentException * If the key is an empty String. */ - protected TSTNode getOrCreateNode(String key) throws NullPointerException, + protected TSTNode getOrCreateNode(CharSequence key) throws NullPointerException, IllegalArgumentException { if (key == null) { throw new NullPointerException( @@ -568,7 +569,7 @@ public class JaspellTernarySearchTrie { * The maximum number of values returned by this method. *@return A List with the results */ - public List matchAlmost(String key, int numReturnValues) { + public List matchAlmost(CharSequence key, int numReturnValues) { return matchAlmostRecursion(rootNode, 0, matchAlmostDiff, key, ((numReturnValues < 0) ? -1 : numReturnValues), new Vector(), false); } @@ -598,7 +599,7 @@ public class JaspellTernarySearchTrie { *@return A List with the results. */ private List matchAlmostRecursion(TSTNode currentNode, int charIndex, - int d, String matchAlmostKey, int matchAlmostNumReturnValues, + int d, CharSequence matchAlmostKey, int matchAlmostNumReturnValues, List matchAlmostResult2, boolean upTo) { if ((currentNode == null) || (matchAlmostNumReturnValues != -1 && matchAlmostResult2.size() >= matchAlmostNumReturnValues) @@ -658,7 +659,7 @@ public class JaspellTernarySearchTrie { * The maximum number of values returned from this method. *@return A List with the results */ - public List matchPrefix(String prefix, int numReturnValues) { + public List matchPrefix(CharSequence prefix, int numReturnValues) { Vector sortKeysResult = new Vector(); TSTNode startNode = getNode(prefix); if (startNode == null) { @@ -722,8 +723,8 @@ public class JaspellTernarySearchTrie { *@param value * The object to be stored in the Trie. */ - public void put(String key, Object value) { - getOrCreateNode(key.trim().toLowerCase()).data = value; + public void put(CharSequence key, Object value) { + getOrCreateNode(key).data = value; } /** diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTAutocomplete.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTAutocomplete.java index ce44090..4c6da40 100644 --- a/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTAutocomplete.java +++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTAutocomplete.java @@ -57,7 +57,7 @@ public class TSTAutocomplete { * index of character in key to be inserted currently. * @return currentNode The new reference to root node of TST */ - public TernaryTreeNode insert(TernaryTreeNode currentNode, String s, + public TernaryTreeNode insert(TernaryTreeNode currentNode, CharSequence s, Object val, int x) { if (s == null || s.length() <= x) { return currentNode; @@ -69,7 +69,7 @@ public class TSTAutocomplete { if (x < s.length() - 1) { currentNode.eqKid = insert(currentNode.eqKid, s, val, x + 1); } else { - currentNode.token = s; + currentNode.token = s.toString(); currentNode.val = val; return currentNode; } @@ -79,7 +79,7 @@ public class TSTAutocomplete { if (x < s.length() - 1) { currentNode.eqKid = insert(currentNode.eqKid, s, val, x + 1); } else { - currentNode.token = s; + currentNode.token = s.toString(); currentNode.val = val; return currentNode; } @@ -104,7 +104,7 @@ public class TSTAutocomplete { * @return suggest list of auto-completed keys for the given prefix query. */ public ArrayList prefixCompletion(TernaryTreeNode root, - String s, int x) { + CharSequence s, int x) { TernaryTreeNode p = root; ArrayList suggest = new ArrayList(); diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java index 01fe8b6..97de96e 100644 --- a/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java +++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java @@ -30,14 +30,13 @@ import java.util.List; import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.SortedTermFreqIteratorWrapper; -import org.apache.lucene.search.spell.SortedIterator; import org.apache.lucene.search.spell.TermFreqIterator; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.UnicodeUtil; -public class TSTLookup extends Lookup { +public class TSTLookup extends Lookup { TernaryTreeNode root = new TernaryTreeNode(); TSTAutocomplete autocomplete = new TSTAutocomplete(); @@ -45,7 +44,7 @@ public class TSTLookup extends Lookup { public void build(TermFreqIterator tfit) throws IOException { root = new TernaryTreeNode(); // buffer first - if ((!(tfit instanceof SortedIterator)) || ((SortedIterator)tfit).comparator() != BytesRef.getUTF8SortedAsUTF16Comparator()) { + if (tfit.getComparator() != BytesRef.getUTF8SortedAsUTF16Comparator()) { // make sure it's sorted and the comparator uses UTF16 sort order tfit = new SortedTermFreqIteratorWrapper(tfit, BytesRef.getUTF8SortedAsUTF16Comparator()); } @@ -58,34 +57,47 @@ public class TSTLookup extends Lookup { charsSpare.grow(spare.length); UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare); tokens.add(charsSpare.toString()); - vals.add(new Float(tfit.freq())); + vals.add(new Float(tfit.weight())); } autocomplete.balancedTree(tokens.toArray(), vals.toArray(), 0, tokens.size() - 1, root); } @Override - public boolean add(String key, Object value) { + public boolean add(CharSequence key, Object value) { autocomplete.insert(root, key, value, 0); // XXX we don't know if a new node was created return true; } @Override - public Object get(String key) { + public Object get(CharSequence key) { List list = autocomplete.prefixCompletion(root, key, 0); if (list == null || list.isEmpty()) { return null; } for (TernaryTreeNode n : list) { - if (n.token.equals(key)) { + if (charSeqEquals(n.token, key)) { return n.val; } } return null; } + + private static boolean charSeqEquals(CharSequence left, CharSequence right) { + int len = left.length(); + if (len != right.length()) { + return false; + } + for (int i = 0; i < len; i++) { + if (left.charAt(i) != right.charAt(i)) { + return false; + } + } + return true; + } @Override - public List lookup(String key, boolean onlyMorePopular, int num) { + public List lookup(CharSequence key, boolean onlyMorePopular, int num) { List list = autocomplete.prefixCompletion(root, key, 0); List res = new ArrayList(); if (list == null || list.size() == 0) { diff --git a/modules/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java b/modules/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java index 0d56526..3dcd2d8 100644 --- a/modules/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java +++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java @@ -45,7 +45,7 @@ import org.junit.Ignore; @Ignore("COMMENT ME TO RUN BENCHMARKS!") public class LookupBenchmarkTest extends LuceneTestCase { @SuppressWarnings("unchecked") - private final List> benchmarkClasses = Arrays.asList( + private final List>> benchmarkClasses = Arrays.asList( JaspellLookup.class, TSTLookup.class, FSTCompletionLookup.class, @@ -97,7 +97,7 @@ public class LookupBenchmarkTest extends LuceneTestCase { while ((line = br.readLine()) != null) { int tab = line.indexOf('|'); assertTrue("No | separator?: " + line, tab >= 0); - float weight = Float.parseFloat(line.substring(tab + 1)); + int weight = Integer.parseInt(line.substring(tab + 1)); String key = line.substring(0, tab); input.add(new TermFreq(key, weight)); } @@ -199,7 +199,7 @@ public class LookupBenchmarkTest extends LuceneTestCase { public Integer call() throws Exception { int v = 0; for (String term : input) { - v += lookup.lookup(term, onlyMorePopular, num).size(); + v += lookup.lookup(new CharsRef(term), onlyMorePopular, num).size(); } return v; } diff --git a/modules/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java b/modules/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java index c4ab93e..d015af9 100644 --- a/modules/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java +++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java @@ -22,6 +22,7 @@ import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.fst.FSTCompletionLookup; import org.apache.lucene.search.suggest.jaspell.JaspellLookup; import org.apache.lucene.search.suggest.tst.TSTLookup; +import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.LuceneTestCase; public class PersistenceTest extends LuceneTestCase { @@ -58,10 +59,10 @@ public class PersistenceTest extends LuceneTestCase { boolean supportsExactWeights) throws Exception { // Add all input keys. - Lookup lookup = lookupClass.newInstance(); + Lookup lookup = lookupClass.newInstance(); TermFreq[] keys = new TermFreq[this.keys.length]; for (int i = 0; i < keys.length; i++) - keys[i] = new TermFreq(this.keys[i], (float) i); + keys[i] = new TermFreq(this.keys[i], i); lookup.build(new TermFreqArrayIterator(keys)); // Store the suggester. @@ -74,8 +75,10 @@ public class PersistenceTest extends LuceneTestCase { // Assert validity. float previous = Float.NEGATIVE_INFINITY; + CharsRef scratch = new CharsRef(); for (TermFreq k : keys) { - Float val = (Float) lookup.get(k.term.utf8ToString()); + scratch.grow(k.term.length); + Float val = (Float) lookup.get(CharsRef.fromBytes(k.term, scratch)); assertNotNull(k.term.utf8ToString(), val); if (supportsExactWeights) { diff --git a/modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreq.java b/modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreq.java index 29d0433..49b346b 100644 --- a/modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreq.java +++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreq.java @@ -21,13 +21,13 @@ import org.apache.lucene.util.BytesRef; public final class TermFreq { public final BytesRef term; - public final float v; + public final long v; - public TermFreq(String term, float v) { + public TermFreq(String term, long v) { this(new BytesRef(term), v); } - public TermFreq(BytesRef term, float v) { + public TermFreq(BytesRef term, long v) { this.term = term; this.v = v; } diff --git a/modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreqArrayIterator.java b/modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreqArrayIterator.java index 27cbb66..1abf941 100644 --- a/modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreqArrayIterator.java +++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreqArrayIterator.java @@ -19,6 +19,7 @@ package org.apache.lucene.search.suggest; import java.io.IOException; import java.util.Arrays; +import java.util.Comparator; import java.util.Iterator; import org.apache.lucene.search.spell.TermFreqIterator; @@ -44,7 +45,7 @@ public final class TermFreqArrayIterator implements TermFreqIterator { this(i.iterator()); } - public float freq() { + public long weight() { return current.v; } @@ -57,4 +58,9 @@ public final class TermFreqArrayIterator implements TermFreqIterator { } return null; } + + @Override + public Comparator getComparator() { + return null; + } } \ No newline at end of file diff --git a/modules/suggest/src/test/org/apache/lucene/search/suggest/TestBytesRefList.java b/modules/suggest/src/test/org/apache/lucene/search/suggest/TestBytesRefList.java index 13a4a4e..8195281 100644 --- a/modules/suggest/src/test/org/apache/lucene/search/suggest/TestBytesRefList.java +++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/TestBytesRefList.java @@ -21,6 +21,8 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.List; + +import org.apache.lucene.search.suggest.BytesRefList; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefIterator; import org.apache.lucene.util.LuceneTestCase; diff --git a/modules/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqIterator.java b/modules/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqIterator.java index f915afc..6e74bc2 100644 --- a/modules/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqIterator.java +++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqIterator.java @@ -38,7 +38,7 @@ public class TestTermFreqIterator extends LuceneTestCase { public void testTerms() throws Exception { int num = atLeast(10000); - TreeMap sorted = new TreeMap(); + TreeMap sorted = new TreeMap(); TermFreq[] unsorted = new TermFreq[num]; for (int i = 0; i < num; i++) { @@ -46,28 +46,28 @@ public class TestTermFreqIterator extends LuceneTestCase { do { key = new BytesRef(_TestUtil.randomUnicodeString(random)); } while (sorted.containsKey(key)); - float value = random.nextFloat(); + long value = random.nextLong(); sorted.put(key, value); unsorted[i] = new TermFreq(key, value); } // test the sorted iterator wrapper TermFreqIterator wrapper = new SortedTermFreqIteratorWrapper(new TermFreqArrayIterator(unsorted), BytesRef.getUTF8SortedAsUnicodeComparator()); - Iterator> expected = sorted.entrySet().iterator(); + Iterator> expected = sorted.entrySet().iterator(); while (expected.hasNext()) { - Map.Entry entry = expected.next(); + Map.Entry entry = expected.next(); assertEquals(entry.getKey(), wrapper.next()); - assertEquals(entry.getValue().floatValue(), wrapper.freq(), 0F); + assertEquals(entry.getValue().longValue(), wrapper.weight(), 0F); } assertNull(wrapper.next()); // test the unsorted iterator wrapper wrapper = new UnsortedTermFreqIteratorWrapper(new TermFreqArrayIterator(unsorted)); - TreeMap actual = new TreeMap(); + TreeMap actual = new TreeMap(); BytesRef key; while ((key = wrapper.next()) != null) { - float value = wrapper.freq(); + long value = wrapper.weight(); actual.put(BytesRef.deepCopyOf(key), value); } assertEquals(sorted, actual); diff --git a/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java b/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java index 354b843..924abc4 100644 --- a/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java +++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java @@ -28,7 +28,7 @@ import org.apache.lucene.util.*; * Unit tests for {@link FSTCompletion}. */ public class FSTCompletionTest extends LuceneTestCase { - public static TermFreq tf(String t, float v) { + public static TermFreq tf(String t, int v) { return new TermFreq(t, v); } @@ -62,9 +62,9 @@ public class FSTCompletionTest extends LuceneTestCase { tf("foundation", 1), tf("fourblah", 1), tf("fourteen", 1), - tf("four", 0f), - tf("fourier", 0f), - tf("fourty", 0f), + tf("four", 0), + tf("fourier", 0), + tf("fourty", 0), tf("xo", 1), }; return keys; @@ -158,7 +158,7 @@ public class FSTCompletionTest extends LuceneTestCase { Random r = random; List keys = new ArrayList(); for (int i = 0; i < 5000; i++) { - keys.add(new TermFreq(_TestUtil.randomSimpleString(r), -1.0f)); + keys.add(new TermFreq(_TestUtil.randomSimpleString(r), -1)); } lookup.build(new TermFreqArrayIterator(keys)); @@ -166,8 +166,10 @@ public class FSTCompletionTest extends LuceneTestCase { // All the weights were constant, so all returned buckets must be constant, whatever they // are. Float previous = null; + CharsRef scratch = new CharsRef(); for (TermFreq tf : keys) { - Float current = lookup.get(tf.term.utf8ToString()); + scratch.grow(tf.term.length); + Float current = lookup.get(CharsRef.fromBytes(tf.term, scratch)); if (previous != null) { assertEquals(previous, current); } @@ -181,10 +183,11 @@ public class FSTCompletionTest extends LuceneTestCase { FSTCompletionLookup lookup = new FSTCompletionLookup(); lookup.build(new TermFreqArrayIterator(input)); - + CharsRef scratch = new CharsRef(); for (TermFreq tf : input) { - assertTrue("Not found: " + tf.term, lookup.get(tf.term.utf8ToString()) != null); - assertEquals(tf.term, lookup.lookup(tf.term.utf8ToString(), true, 1).get(0).key); + scratch.grow(tf.term.length); + assertTrue("Not found: " + tf.term, lookup.get(CharsRef.fromBytes(tf.term, scratch)) != null); + assertEquals(tf.term, lookup.lookup(CharsRef.fromBytes(tf.term, scratch), true, 1).get(0).key); } List result = lookup.lookup("wit", true, 5); @@ -203,7 +206,7 @@ public class FSTCompletionTest extends LuceneTestCase { List freqs = new ArrayList(); Random rnd = random; for (int i = 0; i < 2500 + rnd.nextInt(2500); i++) { - float weight = rnd.nextFloat() * 100; + int weight = random.nextInt(100); freqs.add(new TermFreq("" + rnd.nextLong(), weight)); } @@ -215,7 +218,7 @@ public class FSTCompletionTest extends LuceneTestCase { for (int i = 1; i < term.length(); i++) { String prefix = term.substring(0, i); for (LookupResult lr : lookup.lookup(prefix, true, 10)) { - assertTrue(lr.key.startsWith(prefix)); + assertTrue(lr.key.toString().startsWith(prefix)); } } } diff --git a/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java b/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java index d0537ee..42c7ee9 100644 --- a/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java +++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java @@ -47,31 +47,31 @@ public class WFSTCompletionTest extends LuceneTestCase { // top N of 2, but only foo is available List results = suggester.lookup("f", false, 2); assertEquals(1, results.size()); - assertEquals("foo", results.get(0).key); + assertEquals("foo", results.get(0).key.toString()); assertEquals(50, results.get(0).value, 0.01F); // top N of 1 for 'bar': we return this even though barbar is higher results = suggester.lookup("bar", false, 1); assertEquals(1, results.size()); - assertEquals("bar", results.get(0).key); + assertEquals("bar", results.get(0).key.toString()); assertEquals(10, results.get(0).value, 0.01F); // top N Of 2 for 'b' results = suggester.lookup("b", false, 2); assertEquals(2, results.size()); - assertEquals("barbar", results.get(0).key); + assertEquals("barbar", results.get(0).key.toString()); assertEquals(12, results.get(0).value, 0.01F); - assertEquals("bar", results.get(1).key); + assertEquals("bar", results.get(1).key.toString()); assertEquals(10, results.get(1).value, 0.01F); // top N of 3 for 'ba' results = suggester.lookup("ba", false, 3); assertEquals(3, results.size()); - assertEquals("barbar", results.get(0).key); + assertEquals("barbar", results.get(0).key.toString()); assertEquals(12, results.get(0).value, 0.01F); - assertEquals("bar", results.get(1).key); + assertEquals("bar", results.get(1).key.toString()); assertEquals(10, results.get(1).value, 0.01F); - assertEquals("barbara", results.get(2).key); + assertEquals("barbara", results.get(2).key.toString()); assertEquals(6, results.get(2).value, 0.01F); } @@ -100,7 +100,7 @@ public class WFSTCompletionTest extends LuceneTestCase { // we can probably do Integer.MAX_VALUE here, but why worry. int weight = random.nextInt(1<<24); slowCompletor.put(s, (long)weight); - keys[i] = new TermFreq(s, (float) weight); + keys[i] = new TermFreq(s, weight); } WFSTCompletionLookup suggester = new WFSTCompletionLookup(false); @@ -126,7 +126,7 @@ public class WFSTCompletionTest extends LuceneTestCase { public int compare(LookupResult left, LookupResult right) { int cmp = Float.compare(right.value, left.value); if (cmp == 0) { - return left.key.compareTo(right.key); + return left.compareTo(right); } else { return cmp; } @@ -140,7 +140,7 @@ public class WFSTCompletionTest extends LuceneTestCase { for(int hit=0;hit lookup; protected String lookupImpl; protected SolrCore core; @@ -152,7 +153,7 @@ public class Suggester extends SolrSpellChecker { build(core, searcher); } - public void add(String query, int numHits) { + public void add(CharsRef query, int numHits) { LOG.info("add " + query + ", " + numHits); lookup.add(query, new Integer(numHits)); } @@ -167,9 +168,10 @@ public class Suggester extends SolrSpellChecker { return EMPTY_RESULT; } SpellingResult res = new SpellingResult(); + CharsRef scratch = new CharsRef(); for (Token t : options.tokens) { - String term = new String(t.buffer(), 0, t.length()); - List suggestions = lookup.lookup(term, + scratch.set(t.buffer(), 0, t.length()); + List suggestions = lookup.lookup(scratch, options.onlyMorePopular, options.count); if (suggestions == null) { continue; @@ -178,7 +180,7 @@ public class Suggester extends SolrSpellChecker { Collections.sort(suggestions); } for (LookupResult lr : suggestions) { - res.add(t, lr.key, ((Number)lr.value).intValue()); + res.add(t, lr.key.toString(), ((Number)lr.value).intValue()); } } return res;