diff --git a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java index 18406f5..1aeabdd 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java @@ -387,7 +387,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader { } @Override - public Comparator getComparator() throws IOException { + public Comparator getComparator() { return BytesRef.getUTF8SortedAsUnicodeComparator(); } } diff --git a/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java b/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java index f43a0e3..c207584 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java @@ -654,7 +654,7 @@ public class DocTermOrds { } @Override - public Comparator getComparator() throws IOException { + public Comparator getComparator() { return termsEnum.getComparator(); } diff --git a/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java b/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java index 80b1e8c..8b3a9dc 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java @@ -174,7 +174,7 @@ public class FilterAtomicReader extends AtomicReader { } @Override - public Comparator getComparator() throws IOException { + public Comparator getComparator() { return in.getComparator(); } diff --git a/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java index 185e897..0995ac1 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java @@ -122,7 +122,7 @@ public abstract class FilteredTermsEnum extends TermsEnum { } @Override - public Comparator getComparator() throws IOException { + public Comparator getComparator() { return tenum.getComparator(); } diff --git a/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java index 9957ec1..8b44e98 100644 --- a/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java @@ -179,13 +179,6 @@ public abstract class TermsEnum implements BytesRefIterator { } }; } - - /** Return the {@link BytesRef} Comparator used to sort - * terms provided by the iterator. This may return - * null if there are no terms. Callers may invoke this - * method many times, so it's best to cache a single - * instance & reuse it. */ - public abstract Comparator getComparator() throws IOException; /** An empty TermsEnum for quickly returning an empty instance e.g. * in {@link org.apache.lucene.search.MultiTermQuery} diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java b/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java index a8e34e7..89240d8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java +++ b/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java @@ -1052,7 +1052,7 @@ class FieldCacheImpl implements FieldCache { } @Override - public Comparator getComparator() throws IOException { + public Comparator getComparator() { return BytesRef.getUTF8SortedAsUnicodeComparator(); } diff --git a/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java b/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java index 2671efe..3edafab 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java @@ -287,7 +287,7 @@ public final class FuzzyTermsEnum extends TermsEnum { } @Override - public Comparator getComparator() throws IOException { + public Comparator getComparator() { return actualEnum.getComparator(); } diff --git a/lucene/core/src/java/org/apache/lucene/util/BytesRefIterator.java b/lucene/core/src/java/org/apache/lucene/util/BytesRefIterator.java index 5809bb5..b22ec1a 100644 --- a/lucene/core/src/java/org/apache/lucene/util/BytesRefIterator.java +++ b/lucene/core/src/java/org/apache/lucene/util/BytesRefIterator.java @@ -18,6 +18,7 @@ package org.apache.lucene.util; */ import java.io.IOException; +import java.util.Comparator; /** * A simple iterator interface for {@link BytesRef} iteration @@ -40,6 +41,14 @@ public interface BytesRefIterator { */ public BytesRef next() throws IOException; + /** + * Return the {@link BytesRef} Comparator used to sort terms provided by the + * iterator. This may return null if there are no items or the iterator is not + * sorted. Callers may invoke this method many times, so it's best to cache a + * single instance & reuse it. + */ + public Comparator getComparator(); + public final static class EmptyBytesRefIterator implements BytesRefIterator { @Override @@ -47,6 +56,10 @@ public interface BytesRefIterator { return null; } + public Comparator getComparator() { + return null; + } + } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java b/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java index c6dbdd6..0ef2609 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java @@ -26,6 +26,7 @@ import java.io.InputStream; import java.io.OutputStream; import java.io.PrintStream; import java.lang.reflect.Method; +import java.nio.CharBuffer; import java.util.Enumeration; import java.util.HashMap; import java.util.Map; @@ -707,4 +708,23 @@ public class _TestUtil { } return termsEnum.docs(liveDocs, null, needsFreqs); } + + public static CharSequence stringToCharSequence(String string, Random random) { + return bytesToCharSequence(new BytesRef(string), random); + } + + public static CharSequence bytesToCharSequence(BytesRef ref, Random random) { + switch(random.nextInt(5)) { + case 4: + CharsRef chars = new CharsRef(ref.length); + UnicodeUtil.UTF8toUTF16(ref.bytes, ref.offset, ref.length, chars); + return chars; + case 3: + return CharBuffer.wrap(ref.utf8ToString()); + default: + return ref.utf8ToString(); + } + + } + } diff --git a/modules/suggest/src/java/org/apache/lucene/search/spell/Dictionary.java b/modules/suggest/src/java/org/apache/lucene/search/spell/Dictionary.java index 073da44..4602e24 100755 --- a/modules/suggest/src/java/org/apache/lucene/search/spell/Dictionary.java +++ b/modules/suggest/src/java/org/apache/lucene/search/spell/Dictionary.java @@ -16,6 +16,7 @@ package org.apache.lucene.search.spell; * limitations under the License. */ +import java.io.IOException; import org.apache.lucene.util.BytesRefIterator; /** @@ -30,5 +31,5 @@ public interface Dictionary { * Return all words present in the dictionary * @return Iterator */ - BytesRefIterator getWordsIterator(); + BytesRefIterator getWordsIterator() throws IOException; } diff --git a/modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java b/modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java index 33b4119..3f4833e 100644 --- a/modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java +++ b/modules/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java @@ -19,16 +19,13 @@ package org.apache.lucene.search.spell; import java.io.IOException; import java.util.Comparator; -import java.util.Iterator; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.Terms; import org.apache.lucene.index.MultiFields; import org.apache.lucene.util.BytesRefIterator; -import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.UnicodeUtil; /** * HighFrequencyDictionary: terms taken from the given field @@ -44,7 +41,6 @@ public class HighFrequencyDictionary implements Dictionary { private IndexReader reader; private String field; private float thresh; - private final CharsRef spare = new CharsRef(); public HighFrequencyDictionary(IndexReader reader, String field, float thresh) { this.reader = reader; @@ -52,66 +48,55 @@ public class HighFrequencyDictionary implements Dictionary { this.thresh = thresh; } - public final BytesRefIterator getWordsIterator() { + public final BytesRefIterator getWordsIterator() throws IOException { return new HighFrequencyIterator(); } - final class HighFrequencyIterator implements TermFreqIterator, SortedIterator { + final class HighFrequencyIterator implements TermFreqIterator { private final BytesRef spare = new BytesRef(); private final TermsEnum termsEnum; private int minNumDocs; + private long freq; - HighFrequencyIterator() { - try { - Terms terms = MultiFields.getTerms(reader, field); - if (terms != null) { - termsEnum = terms.iterator(null); - } else { - termsEnum = null; - } - minNumDocs = (int)(thresh * (float)reader.numDocs()); - } catch (IOException e) { - throw new RuntimeException(e); + HighFrequencyIterator() throws IOException { + Terms terms = MultiFields.getTerms(reader, field); + if (terms != null) { + termsEnum = terms.iterator(null); + } else { + termsEnum = null; } + minNumDocs = (int)(thresh * (float)reader.numDocs()); } private boolean isFrequent(int freq) { return freq >= minNumDocs; } - public float freq() { - try { - return termsEnum.docFreq(); - } catch (IOException ioe) { - throw new RuntimeException(ioe); - } + public long weight() { + return freq; } - @Override public BytesRef next() throws IOException { if (termsEnum != null) { BytesRef next; - while ((next = termsEnum.next()) != null) { + while((next = termsEnum.next()) != null) { if (isFrequent(termsEnum.docFreq())) { + freq = termsEnum.docFreq(); spare.copyBytes(next); return spare; } - } + } } return null; } @Override - public Comparator comparator() { - try { - if (termsEnum == null) { - return null; - } else { - return termsEnum.getComparator(); - } - } catch (IOException e) { - throw new RuntimeException(e); + public Comparator getComparator() { + if (termsEnum == null) { + return null; + } else { + return termsEnum.getComparator(); } } } diff --git a/modules/suggest/src/java/org/apache/lucene/search/spell/LuceneDictionary.java b/modules/suggest/src/java/org/apache/lucene/search/spell/LuceneDictionary.java index bd4afcc..ee835e6 100755 --- a/modules/suggest/src/java/org/apache/lucene/search/spell/LuceneDictionary.java +++ b/modules/suggest/src/java/org/apache/lucene/search/spell/LuceneDictionary.java @@ -43,17 +43,12 @@ public class LuceneDictionary implements Dictionary { this.field = field; } - public final BytesRefIterator getWordsIterator() { - - try { - final Terms terms = MultiFields.getTerms(reader, field); - if (terms != null) { - return terms.iterator(null); - } else { - return BytesRefIterator.EMPTY_ITERATOR; - } - } catch (IOException e) { - throw new RuntimeException(e); + public final BytesRefIterator getWordsIterator() throws IOException { + final Terms terms = MultiFields.getTerms(reader, field); + if (terms != null) { + return terms.iterator(null); + } else { + return BytesRefIterator.EMPTY_ITERATOR; } } diff --git a/modules/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java b/modules/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java index 39b1b0e..2be996f 100755 --- a/modules/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java +++ b/modules/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java @@ -18,7 +18,7 @@ package org.apache.lucene.search.spell; */ -import java.util.Iterator; +import java.util.Comparator; import java.io.*; import org.apache.lucene.util.BytesRef; @@ -53,7 +53,7 @@ public class PlainTextDictionary implements Dictionary { in = new BufferedReader(reader); } - public BytesRefIterator getWordsIterator() { + public BytesRefIterator getWordsIterator() throws IOException { return new FileIterator(); } @@ -85,6 +85,11 @@ public class PlainTextDictionary implements Dictionary { } return result; } + + @Override + public Comparator getComparator() { + return null; + } } } diff --git a/modules/suggest/src/java/org/apache/lucene/search/spell/SortedIterator.java b/modules/suggest/src/java/org/apache/lucene/search/spell/SortedIterator.java deleted file mode 100644 index 694e0ca..0000000 --- a/modules/suggest/src/java/org/apache/lucene/search/spell/SortedIterator.java +++ /dev/null @@ -1,33 +0,0 @@ -package org.apache.lucene.search.spell; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.util.Comparator; -import java.util.Iterator; - -import org.apache.lucene.util.BytesRef; - -/** - * Marker interface to signal that elements coming from {@link Iterator} - * come in ascending lexicographic order. - */ -public interface SortedIterator { - - public Comparator comparator(); - -} diff --git a/modules/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java b/modules/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java index 4a6d431..64df768 100644 --- a/modules/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java +++ b/modules/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java @@ -18,12 +18,14 @@ package org.apache.lucene.search.spell; */ import java.io.IOException; +import java.util.Comparator; + import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefIterator; public interface TermFreqIterator extends BytesRefIterator { - public float freq(); + public long weight(); public static class TermFreqIteratorWrapper implements TermFreqIterator { private BytesRefIterator wrapped; @@ -32,12 +34,17 @@ public interface TermFreqIterator extends BytesRefIterator { this.wrapped = wrapped; } - public float freq() { - return 1.0f; + public long weight() { + return 1; } public BytesRef next() throws IOException { return wrapped.next(); } + + @Override + public Comparator getComparator() { + return wrapped.getComparator(); + } } } diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java index a98f4a5..12c89e0 100644 --- a/modules/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java +++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java @@ -18,7 +18,7 @@ package org.apache.lucene.search.suggest; */ import java.io.IOException; - +import java.util.Comparator; import org.apache.lucene.search.spell.TermFreqIterator; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; @@ -27,12 +27,14 @@ import org.apache.lucene.util.BytesRef; * This wrapper buffers incoming elements. */ public class BufferingTermFreqIteratorWrapper implements TermFreqIterator { - + // TODO keep this for now protected BytesRefList entries = new BytesRefList(); protected int curPos = -1; - protected float[] freqs = new float[1]; + protected long[] freqs = new long[1]; private final BytesRef spare = new BytesRef(); + private final Comparator comp; public BufferingTermFreqIteratorWrapper(TermFreqIterator source) throws IOException { + this.comp = source.getComparator(); BytesRef spare; int freqIndex = 0; while((spare = source.next()) != null) { @@ -40,12 +42,12 @@ public class BufferingTermFreqIteratorWrapper implements TermFreqIterator { if (freqIndex >= freqs.length) { freqs = ArrayUtil.grow(freqs, freqs.length+1); } - freqs[freqIndex++] = source.freq(); + freqs[freqIndex++] = source.weight(); } } - public float freq() { + public long weight() { return freqs[curPos]; } @@ -58,5 +60,10 @@ public class BufferingTermFreqIteratorWrapper implements TermFreqIterator { return null; } + @Override + public Comparator getComparator() { + return comp; + } + } diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/BytesRefList.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/BytesRefList.java index bbd0bfa..8925cca 100644 --- a/modules/suggest/src/java/org/apache/lucene/search/suggest/BytesRefList.java +++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/BytesRefList.java @@ -19,6 +19,7 @@ package org.apache.lucene.search.suggest; import java.io.IOException; import java.util.Comparator; + import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.BytesRef; @@ -83,6 +84,11 @@ final class BytesRefList { } return null; } + + @Override + public Comparator getComparator() { + return null; + } }; } diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java index 15e833f..059e1c2 100644 --- a/modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java +++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java @@ -19,6 +19,7 @@ package org.apache.lucene.search.suggest; import java.io.*; +import java.util.Comparator; import org.apache.lucene.search.spell.Dictionary; import org.apache.lucene.search.spell.TermFreqIterator; @@ -56,11 +57,11 @@ public class FileDictionary implements Dictionary { } final class FileIterator implements TermFreqIterator { - private float curFreq; + private long curFreq; private final BytesRef spare = new BytesRef(); - public float freq() { + public long weight() { return curFreq; } @@ -73,7 +74,8 @@ public class FileDictionary implements Dictionary { if (line != null) { String[] fields = line.split("\t"); if (fields.length > 1) { - curFreq = Float.parseFloat(fields[1]); + // keep reading floats for bw compat + curFreq = (int)Float.parseFloat(fields[1]); spare.copyChars(fields[0]); } else { spare.copyChars(line); @@ -86,6 +88,11 @@ public class FileDictionary implements Dictionary { return null; } } + + @Override + public Comparator getComparator() { + return null; + } } } diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java index 1ab0eac..046a92c 100644 --- a/modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java +++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java @@ -21,6 +21,7 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.util.Comparator; import java.util.List; import org.apache.lucene.search.spell.Dictionary; @@ -33,10 +34,10 @@ public abstract class Lookup { * Result of a lookup. */ public static final class LookupResult implements Comparable { - public final String key; + public final CharSequence key; public final float value; - public LookupResult(String key, float value) { + public LookupResult(CharSequence key, float value) { this.key = key; this.value = value; } @@ -48,10 +49,37 @@ public abstract class Lookup { /** Compare alphabetically. */ public int compareTo(LookupResult o) { - return this.key.compareTo(o.key); + return CHARSEQUENCE_COMPARATOR.compare(key, o.key); } } + public static final Comparator CHARSEQUENCE_COMPARATOR = new CharSequenceComparator(); + + private static class CharSequenceComparator implements Comparator { + + @Override + public int compare(CharSequence o1, CharSequence o2) { + final int l1 = o1.length(); + final int l2 = o2.length(); + + final int aStop; + if (l1 < l2) { + aStop = l1; + } else { + aStop = l2; + } + for (int i = 0; i < aStop; i++) { + int diff = o1.charAt(i) - o2.charAt(i); + if (diff != 0) { + return diff; + } + } + // One is a prefix of the other, or, they are equal: + return l1 - l2; + } + + } + public static final class LookupPriorityQueue extends PriorityQueue { public LookupPriorityQueue(int size) { @@ -99,8 +127,7 @@ public abstract class Lookup { * @param num maximum number of results to return * @return a list of possible completions, with their relative weight (e.g. popularity) */ - // TODO: this should be a BytesRef API? - public abstract List lookup(String key, boolean onlyMorePopular, int num); + public abstract List lookup(CharSequence key, boolean onlyMorePopular, int num); /** * Modify the lookup data by recording additional data. Optional operation. @@ -109,16 +136,14 @@ public abstract class Lookup { * @return true if new key is added, false if it already exists or operation * is not supported. */ - // TODO: this should be a BytesRef API? - public abstract boolean add(String key, Object value); + public abstract boolean add(CharSequence key, Object value); /** * Get value associated with a specific key. * @param key lookup key * @return associated value */ - // TODO: this should be a BytesRef API? - public abstract Object get(String key); + public abstract Object get(CharSequence key); /** * Persist the constructed lookup data to a directory. Optional operation. diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java index ffa4f9b..2380724 100644 --- a/modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java +++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java @@ -20,7 +20,6 @@ package org.apache.lucene.search.suggest; import java.io.IOException; import java.util.Comparator; -import org.apache.lucene.search.spell.SortedIterator; import org.apache.lucene.search.spell.TermFreqIterator; import org.apache.lucene.util.BytesRef; @@ -28,13 +27,12 @@ import org.apache.lucene.util.BytesRef; * This wrapper buffers incoming elements and makes sure they are sorted in * ascending lexicographic order. */ -public class SortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper implements SortedIterator { - +public class SortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper { + // TODO keep this for now - but the consumer should really sort this stuff on disk with sorter... private final int[] sortedOrds; private int currentOrd = -1; private final BytesRef spare = new BytesRef(); private final Comparator comp; - public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator comp) throws IOException { super(source); @@ -43,7 +41,7 @@ public class SortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrap } @Override - public float freq() { + public long weight() { return freqs[currentOrd]; } @@ -56,9 +54,8 @@ public class SortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrap } @Override - public Comparator comparator() { + public Comparator getComparator() { return comp; } - } diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java index d7b1b60..4a7e3d8 100644 --- a/modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java +++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java @@ -28,7 +28,7 @@ import org.apache.lucene.util.BytesRef; * random order. */ public class UnsortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper { - + // TODO keep this for now private final int[] ords; private int currentOrd = -1; private final BytesRef spare = new BytesRef(); @@ -48,7 +48,7 @@ public class UnsortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWr } @Override - public float freq() { + public long weight() { return freqs[currentOrd]; } diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletion.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletion.java index 23a8df2..59fdc4c 100644 --- a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletion.java +++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletion.java @@ -199,7 +199,7 @@ public class FSTCompletion { * @return Returns the suggestions, sorted by their approximated weight first * (decreasing) and then alphabetically (UTF-8 codepoint order). */ - public List lookup(String key, int num) { + public List lookup(CharSequence key, int num) { if (key.length() == 0 || automaton == null) { return EMPTY_RESULT; } @@ -388,7 +388,7 @@ public class FSTCompletion { * Returns the bucket assigned to a given key (if found) or null if * no exact match exists. */ - public Integer getBucket(String key) { + public Integer getBucket(CharSequence key) { return getExactMatchStartingFromRootArc(0, new BytesRef(key)); } diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java index 4de0d00..aee2ea1 100644 --- a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java +++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java @@ -41,7 +41,7 @@ import org.apache.lucene.util.fst.NoOutputs; * An adapter from {@link Lookup} API to {@link FSTCompletion}. * *

This adapter differs from {@link FSTCompletion} in that it attempts - * to discretize any "weights" as passed from in {@link TermFreqIterator#freq()} + * to discretize any "weights" as passed from in {@link TermFreqIterator#weight()} * to match the number of buckets. For the rationale for bucketing, see * {@link FSTCompletion}. * @@ -171,7 +171,7 @@ public class FSTCompletionLookup extends Lookup { } output.reset(buffer); - output.writeInt(FloatMagic.toSortable(tfit.freq())); + output.writeInt(FloatMagic.toSortable(tfit.weight())); output.writeBytes(spare.bytes, spare.offset, spare.length); writer.write(buffer, 0, output.getPosition()); } @@ -232,7 +232,7 @@ public class FSTCompletionLookup extends Lookup { } @Override - public List lookup(String key, boolean higherWeightsFirst, int num) { + public List lookup(CharSequence key, boolean higherWeightsFirst, int num) { final List completions; if (higherWeightsFirst) { completions = higherWeightsCompletion.lookup(key, num); @@ -241,20 +241,23 @@ public class FSTCompletionLookup extends Lookup { } final ArrayList results = new ArrayList(completions.size()); + CharsRef spare = new CharsRef(); for (Completion c : completions) { - results.add(new LookupResult(c.utf8.utf8ToString(), c.bucket)); + spare.grow(c.utf8.length); + UnicodeUtil.UTF8toUTF16(c.utf8, spare); + results.add(new LookupResult(spare.toString(), c.bucket)); } return results; } @Override - public boolean add(String key, Object value) { + public boolean add(CharSequence key, Object value) { // Not supported. return false; } @Override - public Float get(String key) { + public Object get(CharSequence key) { Integer bucket = normalCompletion.getBucket(key); if (bucket == null) return null; diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java index 882b133..d286e48 100644 --- a/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java +++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java @@ -33,8 +33,10 @@ import org.apache.lucene.store.InputStreamDataInput; import org.apache.lucene.store.OutputStreamDataOutput; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.fst.Builder; import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.FST.Arc; @@ -121,7 +123,7 @@ public class WFSTCompletionLookup extends Lookup { output.reset(buffer); output.writeBytes(spare.bytes, spare.offset, spare.length); output.writeByte((byte)0); // separator: not used, just for sort order - output.writeInt((int)encodeWeight(iterator.freq())); + output.writeInt((int)encodeWeight(iterator.weight())); writer.write(buffer, 0, output.getPosition()); } writer.close(); @@ -200,7 +202,7 @@ public class WFSTCompletionLookup extends Lookup { } @Override - public List lookup(String key, boolean onlyMorePopular, int num) { + public List lookup(CharSequence key, boolean onlyMorePopular, int num) { assert num > 0; BytesRef scratch = new BytesRef(key); int prefixLength = scratch.length; @@ -217,8 +219,11 @@ public class WFSTCompletionLookup extends Lookup { } List results = new ArrayList(num); + CharsRef spare = new CharsRef(); if (exactFirst && arc.isFinal()) { - results.add(new LookupResult(scratch.utf8ToString(), decodeWeight(prefixOutput + arc.nextFinalOutput))); + spare.grow(scratch.length); + UnicodeUtil.UTF8toUTF16(scratch, spare); + results.add(new LookupResult(spare.toString(), decodeWeight(prefixOutput + arc.nextFinalOutput))); if (--num == 0) { return results; // that was quick } @@ -236,8 +241,9 @@ public class WFSTCompletionLookup extends Lookup { // append suffix Util.toBytesRef(completion.input, suffix); scratch.append(suffix); - - results.add(new LookupResult(scratch.utf8ToString(), decodeWeight(prefixOutput + completion.output))); + spare.grow(scratch.length); + UnicodeUtil.UTF8toUTF16(scratch, spare); + results.add(new LookupResult(spare.toString(), decodeWeight(prefixOutput + completion.output))); } return results; } @@ -264,7 +270,7 @@ public class WFSTCompletionLookup extends Lookup { } @Override - public boolean add(String key, Object value) { + public boolean add(CharSequence key, Object value) { return false; // Not supported. } @@ -273,7 +279,7 @@ public class WFSTCompletionLookup extends Lookup { * or null if it does not exist. */ @Override - public Float get(String key) { + public Object get(CharSequence key) { Arc arc = new Arc(); Long result = null; try { diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java index 82c2883..56a4751 100644 --- a/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java +++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java @@ -28,7 +28,6 @@ import java.io.OutputStream; import java.util.ArrayList; import java.util.List; -import org.apache.lucene.search.spell.SortedIterator; import org.apache.lucene.search.spell.TermFreqIterator; import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.UnsortedTermFreqIteratorWrapper; @@ -45,7 +44,7 @@ public class JaspellLookup extends Lookup { @Override public void build(TermFreqIterator tfit) throws IOException { - if (tfit instanceof SortedIterator) { + if (tfit.getComparator() != null) { // make sure it's unsorted // WTF - this could result in yet another sorted iteration.... tfit = new UnsortedTermFreqIteratorWrapper(tfit); @@ -56,7 +55,7 @@ public class JaspellLookup extends Lookup { final CharsRef charsSpare = new CharsRef(); while ((spare = tfit.next()) != null) { - float freq = tfit.freq(); + float freq = tfit.weight(); if (spare.length == 0) { continue; } @@ -67,19 +66,19 @@ public class JaspellLookup extends Lookup { } @Override - public boolean add(String key, Object value) { + public boolean add(CharSequence key, Object value) { trie.put(key, value); // XXX return false; } @Override - public Object get(String key) { + public Object get(CharSequence key) { return trie.get(key); } @Override - public List lookup(String key, boolean onlyMorePopular, int num) { + public List lookup(CharSequence key, boolean onlyMorePopular, int num) { List res = new ArrayList(); List list; int count = onlyMorePopular ? num * 2 : num; @@ -97,7 +96,7 @@ public class JaspellLookup extends Lookup { LookupPriorityQueue queue = new LookupPriorityQueue(num); for (String s : list) { float freq = (Float)trie.get(s); - queue.insertWithOverflow(new LookupResult(s, freq)); + queue.insertWithOverflow(new LookupResult(new CharsRef(s), freq)); } for (LookupResult lr : queue.getResults()) { res.add(lr); @@ -106,7 +105,7 @@ public class JaspellLookup extends Lookup { for (int i = 0; i < maxCnt; i++) { String s = list.get(i); float freq = (Float)trie.get(s); - res.add(new LookupResult(s, freq)); + res.add(new LookupResult(new CharsRef(s), freq)); } } return res; diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java index 3402575..d07c5e8 100644 --- a/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java +++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellTernarySearchTrie.java @@ -368,8 +368,8 @@ public class JaspellTernarySearchTrie { * A String index. *@return The object retrieved from the Ternary Search Trie. */ - public Object get(String key) { - TSTNode node = getNode(key.trim().toLowerCase()); + public Object get(CharSequence key) { + TSTNode node = getNode(key); if (node == null) { return null; } @@ -435,7 +435,7 @@ public class JaspellTernarySearchTrie { *@return The node object indexed by key. This object is an instance of an * inner class named TernarySearchTrie.TSTNode. */ - public TSTNode getNode(String key) { + public TSTNode getNode(CharSequence key) { return getNode(key, rootNode); } @@ -450,8 +450,7 @@ public class JaspellTernarySearchTrie { *@return The node object indexed by key. This object is an instance of an * inner class named TernarySearchTrie.TSTNode. */ - protected TSTNode getNode(String key2, TSTNode startNode) { - String key = key2.trim().toLowerCase(); + protected TSTNode getNode(CharSequence key, TSTNode startNode) { if (key == null || startNode == null || key.length() == 0) { return null; } @@ -490,7 +489,7 @@ public class JaspellTernarySearchTrie { *@exception IllegalArgumentException * If the key is an empty String. */ - protected TSTNode getOrCreateNode(String key) throws NullPointerException, + protected TSTNode getOrCreateNode(CharSequence key) throws NullPointerException, IllegalArgumentException { if (key == null) { throw new NullPointerException( @@ -568,7 +567,7 @@ public class JaspellTernarySearchTrie { * The maximum number of values returned by this method. *@return A List with the results */ - public List matchAlmost(String key, int numReturnValues) { + public List matchAlmost(CharSequence key, int numReturnValues) { return matchAlmostRecursion(rootNode, 0, matchAlmostDiff, key, ((numReturnValues < 0) ? -1 : numReturnValues), new Vector(), false); } @@ -598,7 +597,7 @@ public class JaspellTernarySearchTrie { *@return A List with the results. */ private List matchAlmostRecursion(TSTNode currentNode, int charIndex, - int d, String matchAlmostKey, int matchAlmostNumReturnValues, + int d, CharSequence matchAlmostKey, int matchAlmostNumReturnValues, List matchAlmostResult2, boolean upTo) { if ((currentNode == null) || (matchAlmostNumReturnValues != -1 && matchAlmostResult2.size() >= matchAlmostNumReturnValues) @@ -658,7 +657,7 @@ public class JaspellTernarySearchTrie { * The maximum number of values returned from this method. *@return A List with the results */ - public List matchPrefix(String prefix, int numReturnValues) { + public List matchPrefix(CharSequence prefix, int numReturnValues) { Vector sortKeysResult = new Vector(); TSTNode startNode = getNode(prefix); if (startNode == null) { @@ -722,8 +721,8 @@ public class JaspellTernarySearchTrie { *@param value * The object to be stored in the Trie. */ - public void put(String key, Object value) { - getOrCreateNode(key.trim().toLowerCase()).data = value; + public void put(CharSequence key, Object value) { + getOrCreateNode(key).data = value; } /** diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTAutocomplete.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTAutocomplete.java index ce44090..4c6da40 100644 --- a/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTAutocomplete.java +++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTAutocomplete.java @@ -57,7 +57,7 @@ public class TSTAutocomplete { * index of character in key to be inserted currently. * @return currentNode The new reference to root node of TST */ - public TernaryTreeNode insert(TernaryTreeNode currentNode, String s, + public TernaryTreeNode insert(TernaryTreeNode currentNode, CharSequence s, Object val, int x) { if (s == null || s.length() <= x) { return currentNode; @@ -69,7 +69,7 @@ public class TSTAutocomplete { if (x < s.length() - 1) { currentNode.eqKid = insert(currentNode.eqKid, s, val, x + 1); } else { - currentNode.token = s; + currentNode.token = s.toString(); currentNode.val = val; return currentNode; } @@ -79,7 +79,7 @@ public class TSTAutocomplete { if (x < s.length() - 1) { currentNode.eqKid = insert(currentNode.eqKid, s, val, x + 1); } else { - currentNode.token = s; + currentNode.token = s.toString(); currentNode.val = val; return currentNode; } @@ -104,7 +104,7 @@ public class TSTAutocomplete { * @return suggest list of auto-completed keys for the given prefix query. */ public ArrayList prefixCompletion(TernaryTreeNode root, - String s, int x) { + CharSequence s, int x) { TernaryTreeNode p = root; ArrayList suggest = new ArrayList(); diff --git a/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java b/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java index 01fe8b6..56b00a3 100644 --- a/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java +++ b/modules/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java @@ -30,7 +30,6 @@ import java.util.List; import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.SortedTermFreqIteratorWrapper; -import org.apache.lucene.search.spell.SortedIterator; import org.apache.lucene.search.spell.TermFreqIterator; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; @@ -45,7 +44,7 @@ public class TSTLookup extends Lookup { public void build(TermFreqIterator tfit) throws IOException { root = new TernaryTreeNode(); // buffer first - if ((!(tfit instanceof SortedIterator)) || ((SortedIterator)tfit).comparator() != BytesRef.getUTF8SortedAsUTF16Comparator()) { + if (tfit.getComparator() != BytesRef.getUTF8SortedAsUTF16Comparator()) { // make sure it's sorted and the comparator uses UTF16 sort order tfit = new SortedTermFreqIteratorWrapper(tfit, BytesRef.getUTF8SortedAsUTF16Comparator()); } @@ -58,34 +57,47 @@ public class TSTLookup extends Lookup { charsSpare.grow(spare.length); UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare); tokens.add(charsSpare.toString()); - vals.add(new Float(tfit.freq())); + vals.add(new Float(tfit.weight())); } autocomplete.balancedTree(tokens.toArray(), vals.toArray(), 0, tokens.size() - 1, root); } @Override - public boolean add(String key, Object value) { + public boolean add(CharSequence key, Object value) { autocomplete.insert(root, key, value, 0); // XXX we don't know if a new node was created return true; } @Override - public Object get(String key) { + public Object get(CharSequence key) { List list = autocomplete.prefixCompletion(root, key, 0); if (list == null || list.isEmpty()) { return null; } for (TernaryTreeNode n : list) { - if (n.token.equals(key)) { + if (charSeqEquals(n.token, key)) { return n.val; } } return null; } + + private static boolean charSeqEquals(CharSequence left, CharSequence right) { + int len = left.length(); + if (len != right.length()) { + return false; + } + for (int i = 0; i < len; i++) { + if (left.charAt(i) != right.charAt(i)) { + return false; + } + } + return true; + } @Override - public List lookup(String key, boolean onlyMorePopular, int num) { + public List lookup(CharSequence key, boolean onlyMorePopular, int num) { List list = autocomplete.prefixCompletion(root, key, 0); List res = new ArrayList(); if (list == null || list.size() == 0) { diff --git a/modules/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java b/modules/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java index 0d56526..92d2585 100644 --- a/modules/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java +++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java @@ -97,7 +97,7 @@ public class LookupBenchmarkTest extends LuceneTestCase { while ((line = br.readLine()) != null) { int tab = line.indexOf('|'); assertTrue("No | separator?: " + line, tab >= 0); - float weight = Float.parseFloat(line.substring(tab + 1)); + int weight = Integer.parseInt(line.substring(tab + 1)); String key = line.substring(0, tab); input.add(new TermFreq(key, weight)); } @@ -199,7 +199,7 @@ public class LookupBenchmarkTest extends LuceneTestCase { public Integer call() throws Exception { int v = 0; for (String term : input) { - v += lookup.lookup(term, onlyMorePopular, num).size(); + v += lookup.lookup(new CharsRef(term), onlyMorePopular, num).size(); } return v; } diff --git a/modules/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java b/modules/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java index c4ab93e..a2deec4 100644 --- a/modules/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java +++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java @@ -23,6 +23,7 @@ import org.apache.lucene.search.suggest.fst.FSTCompletionLookup; import org.apache.lucene.search.suggest.jaspell.JaspellLookup; import org.apache.lucene.search.suggest.tst.TSTLookup; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; public class PersistenceTest extends LuceneTestCase { public final String[] keys = new String[] { @@ -61,7 +62,7 @@ public class PersistenceTest extends LuceneTestCase { Lookup lookup = lookupClass.newInstance(); TermFreq[] keys = new TermFreq[this.keys.length]; for (int i = 0; i < keys.length; i++) - keys[i] = new TermFreq(this.keys[i], (float) i); + keys[i] = new TermFreq(this.keys[i], i); lookup.build(new TermFreqArrayIterator(keys)); // Store the suggester. @@ -75,7 +76,7 @@ public class PersistenceTest extends LuceneTestCase { // Assert validity. float previous = Float.NEGATIVE_INFINITY; for (TermFreq k : keys) { - Float val = (Float) lookup.get(k.term.utf8ToString()); + Float val = (Float) lookup.get(_TestUtil.bytesToCharSequence(k.term, random)); assertNotNull(k.term.utf8ToString(), val); if (supportsExactWeights) { diff --git a/modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreq.java b/modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreq.java index 29d0433..49b346b 100644 --- a/modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreq.java +++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreq.java @@ -21,13 +21,13 @@ import org.apache.lucene.util.BytesRef; public final class TermFreq { public final BytesRef term; - public final float v; + public final long v; - public TermFreq(String term, float v) { + public TermFreq(String term, long v) { this(new BytesRef(term), v); } - public TermFreq(BytesRef term, float v) { + public TermFreq(BytesRef term, long v) { this.term = term; this.v = v; } diff --git a/modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreqArrayIterator.java b/modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreqArrayIterator.java index 27cbb66..1abf941 100644 --- a/modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreqArrayIterator.java +++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/TermFreqArrayIterator.java @@ -19,6 +19,7 @@ package org.apache.lucene.search.suggest; import java.io.IOException; import java.util.Arrays; +import java.util.Comparator; import java.util.Iterator; import org.apache.lucene.search.spell.TermFreqIterator; @@ -44,7 +45,7 @@ public final class TermFreqArrayIterator implements TermFreqIterator { this(i.iterator()); } - public float freq() { + public long weight() { return current.v; } @@ -57,4 +58,9 @@ public final class TermFreqArrayIterator implements TermFreqIterator { } return null; } + + @Override + public Comparator getComparator() { + return null; + } } \ No newline at end of file diff --git a/modules/suggest/src/test/org/apache/lucene/search/suggest/TestBytesRefList.java b/modules/suggest/src/test/org/apache/lucene/search/suggest/TestBytesRefList.java index 13a4a4e..8195281 100644 --- a/modules/suggest/src/test/org/apache/lucene/search/suggest/TestBytesRefList.java +++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/TestBytesRefList.java @@ -21,6 +21,8 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.List; + +import org.apache.lucene.search.suggest.BytesRefList; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefIterator; import org.apache.lucene.util.LuceneTestCase; diff --git a/modules/suggest/src/test/org/apache/lucene/search/suggest/TestHighFrequencyDictionary.java b/modules/suggest/src/test/org/apache/lucene/search/suggest/TestHighFrequencyDictionary.java index dc46eb6..71e479c 100644 --- a/modules/suggest/src/test/org/apache/lucene/search/suggest/TestHighFrequencyDictionary.java +++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/TestHighFrequencyDictionary.java @@ -23,7 +23,6 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.search.spell.Dictionary; import org.apache.lucene.search.spell.HighFrequencyDictionary; -import org.apache.lucene.search.spell.SortedIterator; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRefIterator; import org.apache.lucene.util.LuceneTestCase; @@ -37,8 +36,7 @@ public class TestHighFrequencyDictionary extends LuceneTestCase { IndexReader ir = DirectoryReader.open(dir); Dictionary dictionary = new HighFrequencyDictionary(ir, "bogus", 0.1f); BytesRefIterator tf = dictionary.getWordsIterator(); - assertTrue(tf instanceof SortedIterator); - ((SortedIterator)tf).comparator(); + assertNull(tf.getComparator()); assertNull(tf.next()); dir.close(); } diff --git a/modules/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqIterator.java b/modules/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqIterator.java index f915afc..6e74bc2 100644 --- a/modules/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqIterator.java +++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqIterator.java @@ -38,7 +38,7 @@ public class TestTermFreqIterator extends LuceneTestCase { public void testTerms() throws Exception { int num = atLeast(10000); - TreeMap sorted = new TreeMap(); + TreeMap sorted = new TreeMap(); TermFreq[] unsorted = new TermFreq[num]; for (int i = 0; i < num; i++) { @@ -46,28 +46,28 @@ public class TestTermFreqIterator extends LuceneTestCase { do { key = new BytesRef(_TestUtil.randomUnicodeString(random)); } while (sorted.containsKey(key)); - float value = random.nextFloat(); + long value = random.nextLong(); sorted.put(key, value); unsorted[i] = new TermFreq(key, value); } // test the sorted iterator wrapper TermFreqIterator wrapper = new SortedTermFreqIteratorWrapper(new TermFreqArrayIterator(unsorted), BytesRef.getUTF8SortedAsUnicodeComparator()); - Iterator> expected = sorted.entrySet().iterator(); + Iterator> expected = sorted.entrySet().iterator(); while (expected.hasNext()) { - Map.Entry entry = expected.next(); + Map.Entry entry = expected.next(); assertEquals(entry.getKey(), wrapper.next()); - assertEquals(entry.getValue().floatValue(), wrapper.freq(), 0F); + assertEquals(entry.getValue().longValue(), wrapper.weight(), 0F); } assertNull(wrapper.next()); // test the unsorted iterator wrapper wrapper = new UnsortedTermFreqIteratorWrapper(new TermFreqArrayIterator(unsorted)); - TreeMap actual = new TreeMap(); + TreeMap actual = new TreeMap(); BytesRef key; while ((key = wrapper.next()) != null) { - float value = wrapper.freq(); + long value = wrapper.weight(); actual.put(BytesRef.deepCopyOf(key), value); } assertEquals(sorted, actual); diff --git a/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java b/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java index 354b843..746c01f 100644 --- a/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java +++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java @@ -28,7 +28,7 @@ import org.apache.lucene.util.*; * Unit tests for {@link FSTCompletion}. */ public class FSTCompletionTest extends LuceneTestCase { - public static TermFreq tf(String t, float v) { + public static TermFreq tf(String t, int v) { return new TermFreq(t, v); } @@ -62,28 +62,28 @@ public class FSTCompletionTest extends LuceneTestCase { tf("foundation", 1), tf("fourblah", 1), tf("fourteen", 1), - tf("four", 0f), - tf("fourier", 0f), - tf("fourty", 0f), + tf("four", 0), + tf("fourier", 0), + tf("fourty", 0), tf("xo", 1), }; return keys; } public void testExactMatchHighPriority() throws Exception { - assertMatchEquals(completion.lookup("two", 1), + assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("two", random), 1), "two/1.0"); } public void testExactMatchLowPriority() throws Exception { - assertMatchEquals(completion.lookup("one", 2), + assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("one", random), 2), "one/0.0", "oneness/1.0"); } public void testExactMatchReordering() throws Exception { // Check reordering of exact matches. - assertMatchEquals(completion.lookup("four", 4), + assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("four", random), 4), "four/0.0", "fourblah/1.0", "fourteen/1.0", @@ -92,49 +92,49 @@ public class FSTCompletionTest extends LuceneTestCase { public void testRequestedCount() throws Exception { // 'one' is promoted after collecting two higher ranking results. - assertMatchEquals(completion.lookup("one", 2), + assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("one", random), 2), "one/0.0", "oneness/1.0"); // 'four' is collected in a bucket and then again as an exact match. - assertMatchEquals(completion.lookup("four", 2), + assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("four", random), 2), "four/0.0", "fourblah/1.0"); // Check reordering of exact matches. - assertMatchEquals(completion.lookup("four", 4), + assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("four", random), 4), "four/0.0", "fourblah/1.0", "fourteen/1.0", "fourier/0.0"); // 'one' is at the top after collecting all alphabetical results. - assertMatchEquals(completionAlphabetical.lookup("one", 2), + assertMatchEquals(completionAlphabetical.lookup(_TestUtil.stringToCharSequence("one", random), 2), "one/0.0", "oneness/1.0"); // 'one' is not promoted after collecting two higher ranking results. FSTCompletion noPromotion = new FSTCompletion(completion.getFST(), true, false); - assertMatchEquals(noPromotion.lookup("one", 2), + assertMatchEquals(noPromotion.lookup(_TestUtil.stringToCharSequence("one", random), 2), "oneness/1.0", "onerous/1.0"); // 'one' is at the top after collecting all alphabetical results. - assertMatchEquals(completionAlphabetical.lookup("one", 2), + assertMatchEquals(completionAlphabetical.lookup(_TestUtil.stringToCharSequence("one", random), 2), "one/0.0", "oneness/1.0"); } public void testMiss() throws Exception { - assertMatchEquals(completion.lookup("xyz", 1)); + assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("xyz", random), 1)); } public void testAlphabeticWithWeights() throws Exception { - assertEquals(0, completionAlphabetical.lookup("xyz", 1).size()); + assertEquals(0, completionAlphabetical.lookup(_TestUtil.stringToCharSequence("xyz", random), 1).size()); } public void testFullMatchList() throws Exception { - assertMatchEquals(completion.lookup("one", Integer.MAX_VALUE), + assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("one", random), Integer.MAX_VALUE), "oneness/1.0", "onerous/1.0", "onesimus/1.0", @@ -148,7 +148,7 @@ public class FSTCompletionTest extends LuceneTestCase { builder.add(new BytesRef(key), 0); FSTCompletion lookup = builder.build(); - List result = lookup.lookup(key, 1); + List result = lookup.lookup(_TestUtil.stringToCharSequence(key, random), 1); assertEquals(1, result.size()); } @@ -158,7 +158,7 @@ public class FSTCompletionTest extends LuceneTestCase { Random r = random; List keys = new ArrayList(); for (int i = 0; i < 5000; i++) { - keys.add(new TermFreq(_TestUtil.randomSimpleString(r), -1.0f)); + keys.add(new TermFreq(_TestUtil.randomSimpleString(r), -1)); } lookup.build(new TermFreqArrayIterator(keys)); @@ -167,7 +167,7 @@ public class FSTCompletionTest extends LuceneTestCase { // are. Float previous = null; for (TermFreq tf : keys) { - Float current = lookup.get(tf.term.utf8ToString()); + Float current = (Float)lookup.get(_TestUtil.bytesToCharSequence(tf.term, random)); if (previous != null) { assertEquals(previous, current); } @@ -181,13 +181,14 @@ public class FSTCompletionTest extends LuceneTestCase { FSTCompletionLookup lookup = new FSTCompletionLookup(); lookup.build(new TermFreqArrayIterator(input)); - + CharsRef scratch = new CharsRef(); for (TermFreq tf : input) { - assertTrue("Not found: " + tf.term, lookup.get(tf.term.utf8ToString()) != null); - assertEquals(tf.term, lookup.lookup(tf.term.utf8ToString(), true, 1).get(0).key); + scratch.grow(tf.term.length); + assertTrue("Not found: " + tf.term, lookup.get(_TestUtil.bytesToCharSequence(tf.term, random)) != null); + assertEquals(tf.term, lookup.lookup(_TestUtil.bytesToCharSequence(tf.term, random), true, 1).get(0).key); } - List result = lookup.lookup("wit", true, 5); + List result = lookup.lookup(_TestUtil.stringToCharSequence("wit", random), true, 5); assertEquals(5, result.size()); assertTrue(result.get(0).key.equals("wit")); // exact match. assertTrue(result.get(1).key.equals("with")); // highest count. @@ -195,7 +196,7 @@ public class FSTCompletionTest extends LuceneTestCase { public void testEmptyInput() throws Exception { completion = new FSTCompletionBuilder().build(); - assertMatchEquals(completion.lookup("", 10)); + assertMatchEquals(completion.lookup(_TestUtil.stringToCharSequence("", random), 10)); } @Nightly @@ -203,7 +204,7 @@ public class FSTCompletionTest extends LuceneTestCase { List freqs = new ArrayList(); Random rnd = random; for (int i = 0; i < 2500 + rnd.nextInt(2500); i++) { - float weight = rnd.nextFloat() * 100; + int weight = random.nextInt(100); freqs.add(new TermFreq("" + rnd.nextLong(), weight)); } @@ -214,8 +215,8 @@ public class FSTCompletionTest extends LuceneTestCase { final String term = tf.term.utf8ToString(); for (int i = 1; i < term.length(); i++) { String prefix = term.substring(0, i); - for (LookupResult lr : lookup.lookup(prefix, true, 10)) { - assertTrue(lr.key.startsWith(prefix)); + for (LookupResult lr : lookup.lookup(_TestUtil.stringToCharSequence(prefix, random), true, 10)) { + assertTrue(lr.key.toString().startsWith(prefix)); } } } diff --git a/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java b/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java index d0537ee..916eeb9 100644 --- a/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java +++ b/modules/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java @@ -45,33 +45,33 @@ public class WFSTCompletionTest extends LuceneTestCase { suggester.build(new TermFreqArrayIterator(keys)); // top N of 2, but only foo is available - List results = suggester.lookup("f", false, 2); + List results = suggester.lookup(_TestUtil.stringToCharSequence("f", random), false, 2); assertEquals(1, results.size()); - assertEquals("foo", results.get(0).key); + assertEquals("foo", results.get(0).key.toString()); assertEquals(50, results.get(0).value, 0.01F); // top N of 1 for 'bar': we return this even though barbar is higher - results = suggester.lookup("bar", false, 1); + results = suggester.lookup(_TestUtil.stringToCharSequence("bar", random), false, 1); assertEquals(1, results.size()); - assertEquals("bar", results.get(0).key); + assertEquals("bar", results.get(0).key.toString()); assertEquals(10, results.get(0).value, 0.01F); // top N Of 2 for 'b' - results = suggester.lookup("b", false, 2); + results = suggester.lookup(_TestUtil.stringToCharSequence("b", random), false, 2); assertEquals(2, results.size()); - assertEquals("barbar", results.get(0).key); + assertEquals("barbar", results.get(0).key.toString()); assertEquals(12, results.get(0).value, 0.01F); - assertEquals("bar", results.get(1).key); + assertEquals("bar", results.get(1).key.toString()); assertEquals(10, results.get(1).value, 0.01F); // top N of 3 for 'ba' - results = suggester.lookup("ba", false, 3); + results = suggester.lookup(_TestUtil.stringToCharSequence("ba", random), false, 3); assertEquals(3, results.size()); - assertEquals("barbar", results.get(0).key); + assertEquals("barbar", results.get(0).key.toString()); assertEquals(12, results.get(0).value, 0.01F); - assertEquals("bar", results.get(1).key); + assertEquals("bar", results.get(1).key.toString()); assertEquals(10, results.get(1).value, 0.01F); - assertEquals("barbara", results.get(2).key); + assertEquals("barbara", results.get(2).key.toString()); assertEquals(6, results.get(2).value, 0.01F); } @@ -100,7 +100,7 @@ public class WFSTCompletionTest extends LuceneTestCase { // we can probably do Integer.MAX_VALUE here, but why worry. int weight = random.nextInt(1<<24); slowCompletor.put(s, (long)weight); - keys[i] = new TermFreq(s, (float) weight); + keys[i] = new TermFreq(s, weight); } WFSTCompletionLookup suggester = new WFSTCompletionLookup(false); @@ -109,7 +109,7 @@ public class WFSTCompletionTest extends LuceneTestCase { for (String prefix : allPrefixes) { final int topN = _TestUtil.nextInt(random, 1, 10); - List r = suggester.lookup(prefix, false, topN); + List r = suggester.lookup(_TestUtil.stringToCharSequence(prefix, random), false, topN); // 2. go thru whole treemap (slowCompletor) and check its actually the best suggestion final List matches = new ArrayList(); @@ -126,7 +126,7 @@ public class WFSTCompletionTest extends LuceneTestCase { public int compare(LookupResult left, LookupResult right) { int cmp = Float.compare(right.value, left.value); if (cmp == 0) { - return left.key.compareTo(right.key); + return left.compareTo(right); } else { return cmp; } @@ -140,7 +140,7 @@ public class WFSTCompletionTest extends LuceneTestCase { for(int hit=0;hit suggestions = lookup.lookup(term, + scratch.chars = t.buffer(); + scratch.offset = 0; + scratch.length = t.length(); + List suggestions = lookup.lookup(scratch, options.onlyMorePopular, options.count); if (suggestions == null) { continue; @@ -178,7 +182,7 @@ public class Suggester extends SolrSpellChecker { Collections.sort(suggestions); } for (LookupResult lr : suggestions) { - res.add(t, lr.key, ((Number)lr.value).intValue()); + res.add(t, lr.key.toString(), ((Number)lr.value).intValue()); } } return res;