diff --git lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java index 5882fdf..826ba28 100644 --- lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java +++ lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java @@ -23,6 +23,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.Terms; import org.apache.lucene.index.MultiFields; +import org.apache.lucene.search.suggest.InputIterator; import org.apache.lucene.util.BytesRefIterator; import org.apache.lucene.util.BytesRef; @@ -59,7 +60,7 @@ public class HighFrequencyDictionary implements Dictionary { return new HighFrequencyIterator(); } - final class HighFrequencyIterator implements TermFreqPayloadIterator { + final class HighFrequencyIterator implements InputIterator { private final BytesRef spare = new BytesRef(); private final TermsEnum termsEnum; private int minNumDocs; diff --git lucene/suggest/src/java/org/apache/lucene/search/spell/TermFreqPayloadIterator.java lucene/suggest/src/java/org/apache/lucene/search/spell/TermFreqPayloadIterator.java deleted file mode 100644 index e780db4..0000000 --- lucene/suggest/src/java/org/apache/lucene/search/spell/TermFreqPayloadIterator.java +++ /dev/null @@ -1,83 +0,0 @@ -package org.apache.lucene.search.spell; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.search.suggest.Lookup.LookupResult; // javadocs -import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester; // javadocs -import org.apache.lucene.search.suggest.analyzing.AnalyzingSuggester; // javadocs -import org.apache.lucene.search.suggest.analyzing.FuzzySuggester; // javadocs -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.BytesRefIterator; - -/** - * Interface for enumerating term,weight,payload triples; - * currently only {@link AnalyzingSuggester}, {@link - * FuzzySuggester} and {@link AnalyzingInfixSuggester} support payloads. - */ -public interface TermFreqPayloadIterator extends BytesRefIterator { - - /** A term's weight, higher numbers mean better suggestions. */ - public long weight(); - - /** An arbitrary byte[] to record per suggestion. See - * {@link LookupResult#payload} to retrieve the payload - * for each suggestion. */ - public BytesRef payload(); - - /** Returns true if the iterator has payloads */ - public boolean hasPayloads(); - - /** - * Wraps a BytesRefIterator as a TermFreqPayloadIterator, with all weights - * set to 1 and carries no payload - */ - public static class TermFreqPayloadIteratorWrapper implements TermFreqPayloadIterator { - private final BytesRefIterator wrapped; - - /** - * Creates a new wrapper, wrapping the specified iterator and - * specifying a weight value of 1 for all terms - * and nullifies associated payloads. - */ - public TermFreqPayloadIteratorWrapper(BytesRefIterator wrapped) { - this.wrapped = wrapped; - } - - @Override - public long weight() { - return 1; - } - - @Override - public BytesRef next() throws IOException { - return wrapped.next(); - } - - @Override - public BytesRef payload() { - return null; - } - - @Override - public boolean hasPayloads() { - return false; - } - } -} diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferedInputIterator.java lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferedInputIterator.java new file mode 100644 index 0000000..b9772fa --- /dev/null +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferedInputIterator.java @@ -0,0 +1,88 @@ +package org.apache.lucene.search.suggest; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.Counter; + +/** + * This wrapper buffers incoming elements. + * @lucene.experimental + */ +public class BufferedInputIterator implements InputIterator { + // TODO keep this for now + /** buffered term entries */ + protected BytesRefArray entries = new BytesRefArray(Counter.newCounter()); + /** buffered payload entries */ + protected BytesRefArray payloads = new BytesRefArray(Counter.newCounter()); + /** current buffer position */ + protected int curPos = -1; + /** buffered weights, parallel with {@link #entries} */ + protected long[] freqs = new long[1]; + private final BytesRef spare = new BytesRef(); + private final BytesRef payloadSpare = new BytesRef(); + private final boolean hasPayloads; + + /** Creates a new iterator, buffering entries from the specified iterator */ + public BufferedInputIterator(InputIterator source) throws IOException { + BytesRef spare; + int freqIndex = 0; + hasPayloads = source.hasPayloads(); + while((spare = source.next()) != null) { + entries.append(spare); + if (hasPayloads) { + payloads.append(source.payload()); + } + if (freqIndex >= freqs.length) { + freqs = ArrayUtil.grow(freqs, freqs.length+1); + } + freqs[freqIndex++] = source.weight(); + } + + } + + @Override + public long weight() { + return freqs[curPos]; + } + + @Override + public BytesRef next() throws IOException { + if (++curPos < entries.size()) { + entries.get(spare, curPos); + return spare; + } + return null; + } + + @Override + public BytesRef payload() { + if (hasPayloads && curPos < payloads.size()) { + return payloads.get(payloadSpare, curPos); + } + return null; + } + + @Override + public boolean hasPayloads() { + return hasPayloads; + } +} diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqPayloadIteratorWrapper.java lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqPayloadIteratorWrapper.java deleted file mode 100644 index b78ec0e..0000000 --- lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqPayloadIteratorWrapper.java +++ /dev/null @@ -1,89 +0,0 @@ -package org.apache.lucene.search.suggest; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.search.spell.TermFreqPayloadIterator; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.Counter; - -/** - * This wrapper buffers incoming elements. - * @lucene.experimental - */ -public class BufferingTermFreqPayloadIteratorWrapper implements TermFreqPayloadIterator { - // TODO keep this for now - /** buffered term entries */ - protected BytesRefArray entries = new BytesRefArray(Counter.newCounter()); - /** buffered payload entries */ - protected BytesRefArray payloads = new BytesRefArray(Counter.newCounter()); - /** current buffer position */ - protected int curPos = -1; - /** buffered weights, parallel with {@link #entries} */ - protected long[] freqs = new long[1]; - private final BytesRef spare = new BytesRef(); - private final BytesRef payloadSpare = new BytesRef(); - private final boolean hasPayloads; - - /** Creates a new iterator, buffering entries from the specified iterator */ - public BufferingTermFreqPayloadIteratorWrapper(TermFreqPayloadIterator source) throws IOException { - BytesRef spare; - int freqIndex = 0; - hasPayloads = source.hasPayloads(); - while((spare = source.next()) != null) { - entries.append(spare); - if (hasPayloads) { - payloads.append(source.payload()); - } - if (freqIndex >= freqs.length) { - freqs = ArrayUtil.grow(freqs, freqs.length+1); - } - freqs[freqIndex++] = source.weight(); - } - - } - - @Override - public long weight() { - return freqs[curPos]; - } - - @Override - public BytesRef next() throws IOException { - if (++curPos < entries.size()) { - entries.get(spare, curPos); - return spare; - } - return null; - } - - @Override - public BytesRef payload() { - if (hasPayloads && curPos < payloads.size()) { - return payloads.get(payloadSpare, curPos); - } - return null; - } - - @Override - public boolean hasPayloads() { - return hasPayloads; - } -} diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java index 425d9db..46c61eb 100644 --- lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java @@ -27,12 +27,6 @@ import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.StorableField; import org.apache.lucene.index.StoredDocument; import org.apache.lucene.search.spell.Dictionary; -import org.apache.lucene.search.spell.TermFreqPayloadIterator; -import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester; // javadoc -import org.apache.lucene.search.suggest.fst.FSTCompletionLookup; // javadoc -import org.apache.lucene.search.suggest.fst.WFSTCompletionLookup; // javadoc -import org.apache.lucene.search.suggest.jaspell.JaspellLookup; // javadoc -import org.apache.lucene.search.suggest.tst.TSTLookup; // javadoc import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefIterator; @@ -83,10 +77,10 @@ public class DocumentDictionary implements Dictionary { @Override public BytesRefIterator getWordsIterator() throws IOException { - return new TermWeightPayloadIterator(payloadField!=null); + return new DocumentInputIterator(payloadField!=null); } - final class TermWeightPayloadIterator implements TermFreqPayloadIterator { + final class DocumentInputIterator implements InputIterator { private final int docCount; private final Set relevantFields; private final boolean hasPayloads; @@ -100,7 +94,7 @@ public class DocumentDictionary implements Dictionary { * index. setting withPayload to false, implies an iterator * over only term and weight. */ - public TermWeightPayloadIterator(boolean hasPayloads) throws IOException { + public DocumentInputIterator(boolean hasPayloads) throws IOException { docCount = reader.maxDoc() - 1; this.hasPayloads = hasPayloads; currentPayload = null; diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java index b03033b..16318b3 100644 --- lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java @@ -21,7 +21,6 @@ package org.apache.lucene.search.suggest; import java.io.*; import org.apache.lucene.search.spell.Dictionary; -import org.apache.lucene.search.spell.TermFreqPayloadIterator; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; @@ -57,11 +56,11 @@ public class FileDictionary implements Dictionary { } @Override - public TermFreqPayloadIterator getWordsIterator() { + public InputIterator getWordsIterator() { return new FileIterator(); } - final class FileIterator implements TermFreqPayloadIterator { + final class FileIterator implements InputIterator { private long curFreq; private final BytesRef spare = new BytesRef(); diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/InputIterator.java lucene/suggest/src/java/org/apache/lucene/search/suggest/InputIterator.java new file mode 100644 index 0000000..bda1332 --- /dev/null +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/InputIterator.java @@ -0,0 +1,83 @@ +package org.apache.lucene.search.suggest; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.search.suggest.Lookup.LookupResult; // javadocs +import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester; // javadocs +import org.apache.lucene.search.suggest.analyzing.AnalyzingSuggester; // javadocs +import org.apache.lucene.search.suggest.analyzing.FuzzySuggester; // javadocs +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefIterator; + +/** + * Interface for enumerating term,weight,payload triples for suggester consumption; + * currently only {@link AnalyzingSuggester}, {@link + * FuzzySuggester} and {@link AnalyzingInfixSuggester} support payloads. + */ +public interface InputIterator extends BytesRefIterator { + + /** A term's weight, higher numbers mean better suggestions. */ + public long weight(); + + /** An arbitrary byte[] to record per suggestion. See + * {@link LookupResult#payload} to retrieve the payload + * for each suggestion. */ + public BytesRef payload(); + + /** Returns true if the iterator has payloads */ + public boolean hasPayloads(); + + /** + * Wraps a BytesRefIterator as a suggester InputIterator, with all weights + * set to 1 and carries no payload + */ + public static class InputIteratorWrapper implements InputIterator { + private final BytesRefIterator wrapped; + + /** + * Creates a new wrapper, wrapping the specified iterator and + * specifying a weight value of 1 for all terms + * and nullifies associated payloads. + */ + public InputIteratorWrapper(BytesRefIterator wrapped) { + this.wrapped = wrapped; + } + + @Override + public long weight() { + return 1; + } + + @Override + public BytesRef next() throws IOException { + return wrapped.next(); + } + + @Override + public BytesRef payload() { + return null; + } + + @Override + public boolean hasPayloads() { + return false; + } + } +} diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java index edee62b..dd35d85 100644 --- lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java @@ -24,7 +24,6 @@ import java.util.Comparator; import java.util.List; import org.apache.lucene.search.spell.Dictionary; -import org.apache.lucene.search.spell.TermFreqPayloadIterator; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefIterator; import org.apache.lucene.util.PriorityQueue; @@ -154,25 +153,25 @@ public abstract class Lookup { /** Build lookup from a dictionary. Some implementations may require sorted * or unsorted keys from the dictionary's iterator - use - * {@link SortedTermFreqPayloadIteratorWrapper} or - * {@link UnsortedTermFreqPayloadIteratorWrapper} in such case. + * {@link SortedInputIterator} or + * {@link UnsortedInputIterator} in such case. */ public void build(Dictionary dict) throws IOException { BytesRefIterator it = dict.getWordsIterator(); - TermFreqPayloadIterator tfit; - if (it instanceof TermFreqPayloadIterator) { - tfit = (TermFreqPayloadIterator)it; + InputIterator tfit; + if (it instanceof InputIterator) { + tfit = (InputIterator)it; } else { - tfit = new TermFreqPayloadIterator.TermFreqPayloadIteratorWrapper(it); + tfit = new InputIterator.InputIteratorWrapper(it); } build(tfit); } /** - * Builds up a new internal {@link Lookup} representation based on the given {@link TermFreqPayloadIterator}. + * Builds up a new internal {@link Lookup} representation based on the given {@link InputIterator}. * The implementation might re-sort the data internally. */ - public abstract void build(TermFreqPayloadIterator tfit) throws IOException; + public abstract void build(InputIterator tfit) throws IOException; /** * Look up a key and return possible completion for this key. diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedInputIterator.java lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedInputIterator.java new file mode 100644 index 0000000..d804f38 --- /dev/null +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedInputIterator.java @@ -0,0 +1,226 @@ +package org.apache.lucene.search.suggest; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import java.io.IOException; +import java.util.Comparator; + +import org.apache.lucene.search.suggest.Sort.ByteSequencesReader; +import org.apache.lucene.search.suggest.Sort.ByteSequencesWriter; +import org.apache.lucene.store.ByteArrayDataInput; +import org.apache.lucene.store.ByteArrayDataOutput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IOUtils; + +/** + * This wrapper buffers incoming elements and makes sure they are sorted based on given comparator. + * @lucene.experimental + */ +public class SortedInputIterator implements InputIterator { + + private final InputIterator source; + private File tempInput; + private File tempSorted; + private final ByteSequencesReader reader; + private final Comparator comparator; + private final boolean hasPayloads; + private boolean done = false; + + private long weight; + private final BytesRef scratch = new BytesRef(); + private BytesRef payload = new BytesRef(); + + /** + * Creates a new sorted wrapper, using {@link + * BytesRef#getUTF8SortedAsUnicodeComparator} for + * sorting. */ + public SortedInputIterator(InputIterator source) throws IOException { + this(source, BytesRef.getUTF8SortedAsUnicodeComparator()); + } + + /** + * Creates a new sorted wrapper, sorting by BytesRef + * (ascending) then cost (ascending). + */ + public SortedInputIterator(InputIterator source, Comparator comparator) throws IOException { + this.hasPayloads = source.hasPayloads(); + this.source = source; + this.comparator = comparator; + this.reader = sort(); + } + + @Override + public BytesRef next() throws IOException { + boolean success = false; + if (done) { + return null; + } + try { + ByteArrayDataInput input = new ByteArrayDataInput(); + if (reader.read(scratch)) { + weight = decode(scratch, input); + if (hasPayloads) { + payload = decodePayload(scratch, input); + } + success = true; + return scratch; + } + close(); + success = done = true; + return null; + } finally { + if (!success) { + done = true; + close(); + } + } + } + + @Override + public long weight() { + return weight; + } + + @Override + public BytesRef payload() { + if (hasPayloads) { + return payload; + } + return null; + } + + @Override + public boolean hasPayloads() { + return hasPayloads; + } + + /** Sortes by BytesRef (ascending) then cost (ascending). */ + private final Comparator tieBreakByCostComparator = new Comparator() { + + private final BytesRef leftScratch = new BytesRef(); + private final BytesRef rightScratch = new BytesRef(); + private final ByteArrayDataInput input = new ByteArrayDataInput(); + + @Override + public int compare(BytesRef left, BytesRef right) { + // Make shallow copy in case decode changes the BytesRef: + leftScratch.bytes = left.bytes; + leftScratch.offset = left.offset; + leftScratch.length = left.length; + rightScratch.bytes = right.bytes; + rightScratch.offset = right.offset; + rightScratch.length = right.length; + long leftCost = decode(leftScratch, input); + long rightCost = decode(rightScratch, input); + if (hasPayloads) { + decodePayload(leftScratch, input); + decodePayload(rightScratch, input); + } + int cmp = comparator.compare(leftScratch, rightScratch); + if (cmp != 0) { + return cmp; + } + return Long.compare(leftCost, rightCost); + } + }; + + private Sort.ByteSequencesReader sort() throws IOException { + String prefix = getClass().getSimpleName(); + File directory = Sort.defaultTempDir(); + tempInput = File.createTempFile(prefix, ".input", directory); + tempSorted = File.createTempFile(prefix, ".sorted", directory); + + final Sort.ByteSequencesWriter writer = new Sort.ByteSequencesWriter(tempInput); + boolean success = false; + try { + BytesRef spare; + byte[] buffer = new byte[0]; + ByteArrayDataOutput output = new ByteArrayDataOutput(buffer); + + while ((spare = source.next()) != null) { + encode(writer, output, buffer, spare, source.payload(), source.weight()); + } + writer.close(); + new Sort(tieBreakByCostComparator).sort(tempInput, tempSorted); + ByteSequencesReader reader = new Sort.ByteSequencesReader(tempSorted); + success = true; + return reader; + + } finally { + if (success) { + IOUtils.close(writer); + } else { + try { + IOUtils.closeWhileHandlingException(writer); + } finally { + close(); + } + } + } + } + + private void close() throws IOException { + IOUtils.close(reader); + if (tempInput != null) { + tempInput.delete(); + } + if (tempSorted != null) { + tempSorted.delete(); + } + } + + /** encodes an entry (bytes+(payload)+weight) to the provided writer */ + protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, BytesRef payload, long weight) throws IOException { + int requiredLength = spare.length + 8 + ((hasPayloads) ? 2 + payload.length : 0); + if (requiredLength >= buffer.length) { + buffer = ArrayUtil.grow(buffer, requiredLength); + } + output.reset(buffer); + output.writeBytes(spare.bytes, spare.offset, spare.length); + if (hasPayloads) { + output.writeBytes(payload.bytes, payload.offset, payload.length); + output.writeShort((short) payload.length); + } + output.writeLong(weight); + writer.write(buffer, 0, output.getPosition()); + } + + /** decodes the weight at the current position */ + protected long decode(BytesRef scratch, ByteArrayDataInput tmpInput) { + tmpInput.reset(scratch.bytes); + tmpInput.skipBytes(scratch.length - 8); // suggestion + scratch.length -= 8; // long + return tmpInput.readLong(); + } + + /** decodes the payload at the current position */ + protected BytesRef decodePayload(BytesRef scratch, ByteArrayDataInput tmpInput) { + tmpInput.reset(scratch.bytes); + tmpInput.skipBytes(scratch.length - 2); // skip to payload size + short payloadLength = tmpInput.readShort(); // read payload size + tmpInput.setPosition(scratch.length - 2 - payloadLength); // setPosition to start of payload + BytesRef payloadScratch = new BytesRef(payloadLength); + tmpInput.readBytes(payloadScratch.bytes, 0, payloadLength); // read payload + payloadScratch.length = payloadLength; + scratch.length -= 2; // payload length info (short) + scratch.length -= payloadLength; // payload + return payloadScratch; + } +} diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqPayloadIteratorWrapper.java lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqPayloadIteratorWrapper.java deleted file mode 100644 index b8fa103..0000000 --- lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqPayloadIteratorWrapper.java +++ /dev/null @@ -1,227 +0,0 @@ -package org.apache.lucene.search.suggest; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.File; -import java.io.IOException; -import java.util.Comparator; - -import org.apache.lucene.search.spell.TermFreqPayloadIterator; -import org.apache.lucene.search.suggest.Sort.ByteSequencesReader; -import org.apache.lucene.search.suggest.Sort.ByteSequencesWriter; -import org.apache.lucene.store.ByteArrayDataInput; -import org.apache.lucene.store.ByteArrayDataOutput; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.IOUtils; - -/** - * This wrapper buffers incoming elements and makes sure they are sorted based on given comparator. - * @lucene.experimental - */ -public class SortedTermFreqPayloadIteratorWrapper implements TermFreqPayloadIterator { - - private final TermFreqPayloadIterator source; - private File tempInput; - private File tempSorted; - private final ByteSequencesReader reader; - private final Comparator comparator; - private final boolean hasPayloads; - private boolean done = false; - - private long weight; - private final BytesRef scratch = new BytesRef(); - private BytesRef payload = new BytesRef(); - - /** - * Creates a new sorted wrapper, using {@link - * BytesRef#getUTF8SortedAsUnicodeComparator} for - * sorting. */ - public SortedTermFreqPayloadIteratorWrapper(TermFreqPayloadIterator source) throws IOException { - this(source, BytesRef.getUTF8SortedAsUnicodeComparator()); - } - - /** - * Creates a new sorted wrapper, sorting by BytesRef - * (ascending) then cost (ascending). - */ - public SortedTermFreqPayloadIteratorWrapper(TermFreqPayloadIterator source, Comparator comparator) throws IOException { - this.hasPayloads = source.hasPayloads(); - this.source = source; - this.comparator = comparator; - this.reader = sort(); - } - - @Override - public BytesRef next() throws IOException { - boolean success = false; - if (done) { - return null; - } - try { - ByteArrayDataInput input = new ByteArrayDataInput(); - if (reader.read(scratch)) { - weight = decode(scratch, input); - if (hasPayloads) { - payload = decodePayload(scratch, input); - } - success = true; - return scratch; - } - close(); - success = done = true; - return null; - } finally { - if (!success) { - done = true; - close(); - } - } - } - - @Override - public long weight() { - return weight; - } - - @Override - public BytesRef payload() { - if (hasPayloads) { - return payload; - } - return null; - } - - @Override - public boolean hasPayloads() { - return hasPayloads; - } - - /** Sortes by BytesRef (ascending) then cost (ascending). */ - private final Comparator tieBreakByCostComparator = new Comparator() { - - private final BytesRef leftScratch = new BytesRef(); - private final BytesRef rightScratch = new BytesRef(); - private final ByteArrayDataInput input = new ByteArrayDataInput(); - - @Override - public int compare(BytesRef left, BytesRef right) { - // Make shallow copy in case decode changes the BytesRef: - leftScratch.bytes = left.bytes; - leftScratch.offset = left.offset; - leftScratch.length = left.length; - rightScratch.bytes = right.bytes; - rightScratch.offset = right.offset; - rightScratch.length = right.length; - long leftCost = decode(leftScratch, input); - long rightCost = decode(rightScratch, input); - if (hasPayloads) { - decodePayload(leftScratch, input); - decodePayload(rightScratch, input); - } - int cmp = comparator.compare(leftScratch, rightScratch); - if (cmp != 0) { - return cmp; - } - return Long.compare(leftCost, rightCost); - } - }; - - private Sort.ByteSequencesReader sort() throws IOException { - String prefix = getClass().getSimpleName(); - File directory = Sort.defaultTempDir(); - tempInput = File.createTempFile(prefix, ".input", directory); - tempSorted = File.createTempFile(prefix, ".sorted", directory); - - final Sort.ByteSequencesWriter writer = new Sort.ByteSequencesWriter(tempInput); - boolean success = false; - try { - BytesRef spare; - byte[] buffer = new byte[0]; - ByteArrayDataOutput output = new ByteArrayDataOutput(buffer); - - while ((spare = source.next()) != null) { - encode(writer, output, buffer, spare, source.payload(), source.weight()); - } - writer.close(); - new Sort(tieBreakByCostComparator).sort(tempInput, tempSorted); - ByteSequencesReader reader = new Sort.ByteSequencesReader(tempSorted); - success = true; - return reader; - - } finally { - if (success) { - IOUtils.close(writer); - } else { - try { - IOUtils.closeWhileHandlingException(writer); - } finally { - close(); - } - } - } - } - - private void close() throws IOException { - IOUtils.close(reader); - if (tempInput != null) { - tempInput.delete(); - } - if (tempSorted != null) { - tempSorted.delete(); - } - } - - /** encodes an entry (bytes+(payload)+weight) to the provided writer */ - protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, BytesRef payload, long weight) throws IOException { - int requiredLength = spare.length + 8 + ((hasPayloads) ? 2 + payload.length : 0); - if (requiredLength >= buffer.length) { - buffer = ArrayUtil.grow(buffer, requiredLength); - } - output.reset(buffer); - output.writeBytes(spare.bytes, spare.offset, spare.length); - if (hasPayloads) { - output.writeBytes(payload.bytes, payload.offset, payload.length); - output.writeShort((short) payload.length); - } - output.writeLong(weight); - writer.write(buffer, 0, output.getPosition()); - } - - /** decodes the weight at the current position */ - protected long decode(BytesRef scratch, ByteArrayDataInput tmpInput) { - tmpInput.reset(scratch.bytes); - tmpInput.skipBytes(scratch.length - 8); // suggestion - scratch.length -= 8; // long - return tmpInput.readLong(); - } - - /** decodes the payload at the current position */ - protected BytesRef decodePayload(BytesRef scratch, ByteArrayDataInput tmpInput) { - tmpInput.reset(scratch.bytes); - tmpInput.skipBytes(scratch.length - 2); // skip to payload size - short payloadLength = tmpInput.readShort(); // read payload size - tmpInput.setPosition(scratch.length - 2 - payloadLength); // setPosition to start of payload - BytesRef payloadScratch = new BytesRef(payloadLength); - tmpInput.readBytes(payloadScratch.bytes, 0, payloadLength); // read payload - payloadScratch.length = payloadLength; - scratch.length -= 2; // payload length info (short) - scratch.length -= payloadLength; // payload - return payloadScratch; - } -} diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedInputIterator.java lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedInputIterator.java new file mode 100644 index 0000000..4403fc1 --- /dev/null +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedInputIterator.java @@ -0,0 +1,78 @@ +package org.apache.lucene.search.suggest; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Random; + +import org.apache.lucene.util.BytesRef; + +/** + * This wrapper buffers the incoming elements and makes sure they are in + * random order. + * @lucene.experimental + */ +public class UnsortedInputIterator extends BufferedInputIterator { + // TODO keep this for now + private final int[] ords; + private int currentOrd = -1; + private final BytesRef spare = new BytesRef(); + private final BytesRef payloadSpare = new BytesRef(); + /** + * Creates a new iterator, wrapping the specified iterator and + * returning elements in a random order. + */ + public UnsortedInputIterator(InputIterator source) throws IOException { + super(source); + ords = new int[entries.size()]; + Random random = new Random(); + for (int i = 0; i < ords.length; i++) { + ords[i] = i; + } + for (int i = 0; i < ords.length; i++) { + int randomPosition = random.nextInt(ords.length); + int temp = ords[i]; + ords[i] = ords[randomPosition]; + ords[randomPosition] = temp; + } + } + + @Override + public long weight() { + assert currentOrd == ords[curPos]; + return freqs[currentOrd]; + } + + @Override + public BytesRef next() throws IOException { + if (++curPos < entries.size()) { + currentOrd = ords[curPos]; + return entries.get(spare, currentOrd); + } + return null; + } + + @Override + public BytesRef payload() { + if (hasPayloads() && curPos < payloads.size()) { + assert currentOrd == ords[curPos]; + return payloads.get(payloadSpare, currentOrd); + } + return null; + } +} diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqPayloadIteratorWrapper.java lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqPayloadIteratorWrapper.java deleted file mode 100644 index 8aad73b..0000000 --- lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqPayloadIteratorWrapper.java +++ /dev/null @@ -1,79 +0,0 @@ -package org.apache.lucene.search.suggest; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.util.Random; - -import org.apache.lucene.search.spell.TermFreqPayloadIterator; -import org.apache.lucene.util.BytesRef; - -/** - * This wrapper buffers the incoming elements and makes sure they are in - * random order. - * @lucene.experimental - */ -public class UnsortedTermFreqPayloadIteratorWrapper extends BufferingTermFreqPayloadIteratorWrapper { - // TODO keep this for now - private final int[] ords; - private int currentOrd = -1; - private final BytesRef spare = new BytesRef(); - private final BytesRef payloadSpare = new BytesRef(); - /** - * Creates a new iterator, wrapping the specified iterator and - * returning elements in a random order. - */ - public UnsortedTermFreqPayloadIteratorWrapper(TermFreqPayloadIterator source) throws IOException { - super(source); - ords = new int[entries.size()]; - Random random = new Random(); - for (int i = 0; i < ords.length; i++) { - ords[i] = i; - } - for (int i = 0; i < ords.length; i++) { - int randomPosition = random.nextInt(ords.length); - int temp = ords[i]; - ords[i] = ords[randomPosition]; - ords[randomPosition] = temp; - } - } - - @Override - public long weight() { - assert currentOrd == ords[curPos]; - return freqs[currentOrd]; - } - - @Override - public BytesRef next() throws IOException { - if (++curPos < entries.size()) { - currentOrd = ords[curPos]; - return entries.get(spare, currentOrd); - } - return null; - } - - @Override - public BytesRef payload() { - if (hasPayloads() && curPos < payloads.size()) { - assert currentOrd == ords[curPos]; - return payloads.get(payloadSpare, currentOrd); - } - return null; - } -} diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java index 27d73b8..7d388aa 100644 --- lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java @@ -65,8 +65,8 @@ import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; -import org.apache.lucene.search.spell.TermFreqPayloadIterator; import org.apache.lucene.search.suggest.Lookup.LookupResult; // javadocs +import org.apache.lucene.search.suggest.InputIterator; import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; @@ -175,7 +175,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { } @Override - public void build(TermFreqPayloadIterator iter) throws IOException { + public void build(InputIterator iter) throws IOException { if (searcher != null) { searcher.getIndexReader().close(); diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java index 0b6ff71..ee681c7 100644 --- lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java @@ -31,7 +31,7 @@ import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStreamToAutomaton; -import org.apache.lucene.search.spell.TermFreqPayloadIterator; +import org.apache.lucene.search.suggest.InputIterator; import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.Sort; import org.apache.lucene.store.ByteArrayDataInput; @@ -380,7 +380,7 @@ public class AnalyzingSuggester extends Lookup { } @Override - public void build(TermFreqPayloadIterator iterator) throws IOException { + public void build(InputIterator iterator) throws IOException { String prefix = getClass().getSimpleName(); File directory = Sort.defaultTempDir(); File tempInput = File.createTempFile(prefix, ".input", directory); diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java index cee929b..36c22a7 100644 --- lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java @@ -54,7 +54,7 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.spell.TermFreqPayloadIterator; +import org.apache.lucene.search.suggest.InputIterator; import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.Sort; import org.apache.lucene.store.ByteArrayDataInput; @@ -273,14 +273,14 @@ public class FreeTextSuggester extends Lookup { } @Override - public void build(TermFreqPayloadIterator iterator) throws IOException { + public void build(InputIterator iterator) throws IOException { build(iterator, IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB); } /** Build the suggest index, using up to the specified * amount of temporary RAM while building. Note that * the weights for the suggestions are ignored. */ - public void build(TermFreqPayloadIterator iterator, double ramBufferSizeMB) throws IOException { + public void build(InputIterator iterator, double ramBufferSizeMB) throws IOException { if (iterator.hasPayloads()) { throw new IllegalArgumentException("payloads are not supported"); } diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java index 2bc0aec..3dbf66d 100644 --- lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java @@ -24,7 +24,7 @@ import java.io.OutputStream; import java.util.ArrayList; import java.util.List; -import org.apache.lucene.search.spell.TermFreqPayloadIterator; +import org.apache.lucene.search.suggest.InputIterator; import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.Sort.SortInfo; import org.apache.lucene.search.suggest.Sort; @@ -42,7 +42,7 @@ import org.apache.lucene.util.fst.NoOutputs; * An adapter from {@link Lookup} API to {@link FSTCompletion}. * *

This adapter differs from {@link FSTCompletion} in that it attempts - * to discretize any "weights" as passed from in {@link TermFreqPayloadIterator#weight()} + * to discretize any "weights" as passed from in {@link InputIterator#weight()} * to match the number of buckets. For the rationale for bucketing, see * {@link FSTCompletion}. * @@ -95,7 +95,7 @@ public class FSTCompletionLookup extends Lookup { /** * This constructor prepares for creating a suggested FST using the - * {@link #build(TermFreqPayloadIterator)} method. The number of weight + * {@link #build(InputIterator)} method. The number of weight * discretization buckets is set to {@link FSTCompletion#DEFAULT_BUCKETS} and * exact matches are promoted to the top of the suggestions list. */ @@ -105,7 +105,7 @@ public class FSTCompletionLookup extends Lookup { /** * This constructor prepares for creating a suggested FST using the - * {@link #build(TermFreqPayloadIterator)} method. + * {@link #build(InputIterator)} method. * * @param buckets * The number of weight discretization buckets (see @@ -140,7 +140,7 @@ public class FSTCompletionLookup extends Lookup { } @Override - public void build(TermFreqPayloadIterator tfit) throws IOException { + public void build(InputIterator tfit) throws IOException { if (tfit.hasPayloads()) { throw new IllegalArgumentException("this suggester doesn't support payloads"); } diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java index 982cab5..eb07356 100644 --- lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java @@ -25,10 +25,10 @@ import java.util.Collections; import java.util.Comparator; import java.util.List; -import org.apache.lucene.search.spell.TermFreqPayloadIterator; +import org.apache.lucene.search.suggest.InputIterator; import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.Sort.ByteSequencesWriter; -import org.apache.lucene.search.suggest.SortedTermFreqPayloadIteratorWrapper; +import org.apache.lucene.search.suggest.SortedInputIterator; import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.ByteArrayDataOutput; import org.apache.lucene.store.InputStreamDataInput; @@ -92,12 +92,12 @@ public class WFSTCompletionLookup extends Lookup { } @Override - public void build(TermFreqPayloadIterator iterator) throws IOException { + public void build(InputIterator iterator) throws IOException { if (iterator.hasPayloads()) { throw new IllegalArgumentException("this suggester doesn't support payloads"); } BytesRef scratch = new BytesRef(); - TermFreqPayloadIterator iter = new WFSTTermFreqIteratorWrapper(iterator); + InputIterator iter = new WFSTInputIterator(iterator); IntsRef scratchInts = new IntsRef(); BytesRef previous = null; PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); @@ -254,9 +254,9 @@ public class WFSTCompletionLookup extends Lookup { return Integer.MAX_VALUE - (int)value; } - private final class WFSTTermFreqIteratorWrapper extends SortedTermFreqPayloadIteratorWrapper { + private final class WFSTInputIterator extends SortedInputIterator { - WFSTTermFreqIteratorWrapper(TermFreqPayloadIterator source) throws IOException { + WFSTInputIterator(InputIterator source) throws IOException { super(source); assert source.hasPayloads() == false; } diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java index 83ac512..0482e52 100644 --- lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java @@ -25,7 +25,7 @@ import java.io.OutputStream; import java.util.ArrayList; import java.util.List; -import org.apache.lucene.search.spell.TermFreqPayloadIterator; +import org.apache.lucene.search.suggest.InputIterator; import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.jaspell.JaspellTernarySearchTrie.TSTNode; import org.apache.lucene.util.BytesRef; @@ -46,12 +46,12 @@ public class JaspellLookup extends Lookup { /** * Creates a new empty trie - * @see #build(TermFreqPayloadIterator) + * @see #build(InputIterator) * */ public JaspellLookup() {} @Override - public void build(TermFreqPayloadIterator tfit) throws IOException { + public void build(InputIterator tfit) throws IOException { if (tfit.hasPayloads()) { throw new IllegalArgumentException("this suggester doesn't support payloads"); } diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java index 6eb173c..21ed3ad 100644 --- lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java @@ -25,9 +25,9 @@ import java.io.OutputStream; import java.util.ArrayList; import java.util.List; -import org.apache.lucene.search.spell.TermFreqPayloadIterator; +import org.apache.lucene.search.suggest.InputIterator; import org.apache.lucene.search.suggest.Lookup; -import org.apache.lucene.search.suggest.SortedTermFreqPayloadIteratorWrapper; +import org.apache.lucene.search.suggest.SortedInputIterator; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.IOUtils; @@ -45,19 +45,19 @@ public class TSTLookup extends Lookup { /** * Creates a new TSTLookup with an empty Ternary Search Tree. - * @see #build(TermFreqPayloadIterator) + * @see #build(InputIterator) */ public TSTLookup() {} @Override - public void build(TermFreqPayloadIterator tfit) throws IOException { + public void build(InputIterator tfit) throws IOException { if (tfit.hasPayloads()) { throw new IllegalArgumentException("this suggester doesn't support payloads"); } root = new TernaryTreeNode(); // make sure it's sorted and the comparator uses UTF16 sort order - tfit = new SortedTermFreqPayloadIteratorWrapper(tfit, BytesRef.getUTF8SortedAsUTF16Comparator()); + tfit = new SortedInputIterator(tfit, BytesRef.getUTF8SortedAsUTF16Comparator()); ArrayList tokens = new ArrayList(); ArrayList vals = new ArrayList(); diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java index efdd09c..85418ff 100644 --- lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java @@ -19,7 +19,6 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.spell.Dictionary; -import org.apache.lucene.search.spell.TermFreqPayloadIterator; import org.apache.lucene.search.suggest.DocumentDictionary; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; @@ -78,7 +77,7 @@ public class DocumentDictionaryTest extends LuceneTestCase { writer.close(); IndexReader ir = DirectoryReader.open(dir); Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME); - TermFreqPayloadIterator tfp = (TermFreqPayloadIterator) dictionary.getWordsIterator(); + InputIterator tfp = (InputIterator) dictionary.getWordsIterator(); BytesRef f; while((f = tfp.next())!=null) { Document doc = docs.remove(f.utf8ToString()); @@ -105,7 +104,7 @@ public class DocumentDictionaryTest extends LuceneTestCase { writer.close(); IndexReader ir = DirectoryReader.open(dir); Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME); - TermFreqPayloadIterator tfp = (TermFreqPayloadIterator) dictionary.getWordsIterator(); + InputIterator tfp = (InputIterator) dictionary.getWordsIterator(); BytesRef f; while((f = tfp.next())!=null) { Document doc = docs.remove(f.utf8ToString()); @@ -153,7 +152,7 @@ public class DocumentDictionaryTest extends LuceneTestCase { IndexReader ir = DirectoryReader.open(dir); assertEquals(ir.numDocs(), docs.size()); Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME); - TermFreqPayloadIterator tfp = (TermFreqPayloadIterator) dictionary.getWordsIterator(); + InputIterator tfp = (InputIterator) dictionary.getWordsIterator(); BytesRef f; while((f = tfp.next())!=null) { Document doc = docs.remove(f.utf8ToString()); diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/Input.java lucene/suggest/src/test/org/apache/lucene/search/suggest/Input.java new file mode 100644 index 0000000..009f80c --- /dev/null +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/Input.java @@ -0,0 +1,55 @@ +package org.apache.lucene.search.suggest; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.BytesRef; + +/** corresponds to {@link InputIterator}'s entries */ +public final class Input { + public final BytesRef term; + public final long v; + public final BytesRef payload; + public final boolean hasPayloads; + + public Input(BytesRef term, long v, BytesRef payload) { + this(term, v, payload, true); + } + + public Input(String term, long v, BytesRef payload) { + this(new BytesRef(term), v, payload, true); + } + + public Input(BytesRef term, long v) { + this(term, v, null, false); + } + + public Input(String term, long v) { + this(new BytesRef(term), v, null, false); + } + + public Input(BytesRef term, long v, BytesRef payload, boolean hasPayloads) { + this.term = term; + this.v = v; + this.payload = payload; + this.hasPayloads = hasPayloads; + } + + public boolean hasPayloads() { + return hasPayloads; + } +} \ No newline at end of file diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/InputArrayIterator.java lucene/suggest/src/test/org/apache/lucene/search/suggest/InputArrayIterator.java new file mode 100644 index 0000000..edebb37 --- /dev/null +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/InputArrayIterator.java @@ -0,0 +1,81 @@ +package org.apache.lucene.search.suggest; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Arrays; +import java.util.Iterator; + +import org.apache.lucene.util.BytesRef; + +/** + * A {@link InputIterator} over a sequence of {@link Input}s. + */ +public final class InputArrayIterator implements InputIterator { + private final Iterator i; + private final boolean hasPayloads; + private boolean first; + private Input current; + private final BytesRef spare = new BytesRef(); + + public InputArrayIterator(Iterator i) { + this.i = i; + if (i.hasNext()) { + current = i.next(); + first = true; + this.hasPayloads = current.hasPayloads; + } else { + this.hasPayloads = false; + } + } + + public InputArrayIterator(Input[] i) { + this(Arrays.asList(i)); + } + public InputArrayIterator(Iterable i) { + this(i.iterator()); + } + + @Override + public long weight() { + return current.v; + } + + @Override + public BytesRef next() { + if (i.hasNext() || (first && current!=null)) { + if (first) { + first = false; + } else { + current = i.next(); + } + spare.copyBytes(current.term); + return spare; + } + return null; + } + + @Override + public BytesRef payload() { + return current.payload; + } + + @Override + public boolean hasPayloads() { + return hasPayloads; + } +} \ No newline at end of file diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java index f57d5d3..7858a23 100644 --- lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java @@ -72,12 +72,12 @@ public class LookupBenchmarkTest extends LuceneTestCase { /** * Input term/weight pairs. */ - private static TermFreqPayload [] dictionaryInput; + private static Input [] dictionaryInput; /** * Benchmark term/weight pairs (randomized order). */ - private static List benchmarkInput; + private static List benchmarkInput; /** * Loads terms and frequencies from Wikipedia (cached). @@ -85,9 +85,9 @@ public class LookupBenchmarkTest extends LuceneTestCase { @BeforeClass public static void setup() throws Exception { assert false : "disable assertions before running benchmarks!"; - List input = readTop50KWiki(); + List input = readTop50KWiki(); Collections.shuffle(input, random); - LookupBenchmarkTest.dictionaryInput = input.toArray(new TermFreqPayload [input.size()]); + LookupBenchmarkTest.dictionaryInput = input.toArray(new Input [input.size()]); Collections.shuffle(input, random); LookupBenchmarkTest.benchmarkInput = input; } @@ -97,8 +97,8 @@ public class LookupBenchmarkTest extends LuceneTestCase { /** * Collect the multilingual input for benchmarks/ tests. */ - public static List readTop50KWiki() throws Exception { - List input = new ArrayList(); + public static List readTop50KWiki() throws Exception { + List input = new ArrayList(); URL resource = LookupBenchmarkTest.class.getResource("Top50KWiki.utf8"); assert resource != null : "Resource missing: Top50KWiki.utf8"; @@ -109,7 +109,7 @@ public class LookupBenchmarkTest extends LuceneTestCase { assertTrue("No | separator?: " + line, tab >= 0); int weight = Integer.parseInt(line.substring(tab + 1)); String key = line.substring(0, tab); - input.add(new TermFreqPayload(key, weight)); + input.add(new Input(key, weight)); } br.close(); return input; @@ -163,7 +163,7 @@ public class LookupBenchmarkTest extends LuceneTestCase { /** * Create {@link Lookup} instance and populate it. */ - private Lookup buildLookup(Class cls, TermFreqPayload[] input) throws Exception { + private Lookup buildLookup(Class cls, Input[] input) throws Exception { Lookup lookup = null; try { lookup = cls.newInstance(); @@ -176,7 +176,7 @@ public class LookupBenchmarkTest extends LuceneTestCase { lookup = ctor.newInstance(a); } } - lookup.build(new TermFreqPayloadArrayIterator(input)); + lookup.build(new InputArrayIterator(input)); return lookup; } @@ -220,7 +220,7 @@ public class LookupBenchmarkTest extends LuceneTestCase { final Lookup lookup = buildLookup(cls, dictionaryInput); final List input = new ArrayList(benchmarkInput.size()); - for (TermFreqPayload tf : benchmarkInput) { + for (Input tf : benchmarkInput) { String s = tf.term.utf8ToString(); String sub = s.substring(0, Math.min(s.length(), minPrefixLen + random.nextInt(maxPrefixLen - minPrefixLen + 1))); diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java lucene/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java index 2439857..f98f90f 100644 --- lucene/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java @@ -65,10 +65,10 @@ public class PersistenceTest extends LuceneTestCase { // Add all input keys. Lookup lookup = lookupClass.newInstance(); - TermFreqPayload[] keys = new TermFreqPayload[this.keys.length]; + Input[] keys = new Input[this.keys.length]; for (int i = 0; i < keys.length; i++) - keys[i] = new TermFreqPayload(this.keys[i], i); - lookup.build(new TermFreqPayloadArrayIterator(keys)); + keys[i] = new Input(this.keys[i], i); + lookup.build(new InputArrayIterator(keys)); // Store the suggester. File storeDir = TEMP_DIR; @@ -81,7 +81,7 @@ public class PersistenceTest extends LuceneTestCase { // Assert validity. Random random = random(); long previous = Long.MIN_VALUE; - for (TermFreqPayload k : keys) { + for (Input k : keys) { List list = lookup.lookup(_TestUtil.bytesToCharSequence(k.term, random), false, 1); assertEquals(1, list.size()); LookupResult lookupResult = list.get(0); diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayload.java lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayload.java deleted file mode 100644 index 5463a13..0000000 --- lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayload.java +++ /dev/null @@ -1,54 +0,0 @@ -package org.apache.lucene.search.suggest; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.util.BytesRef; - -public final class TermFreqPayload { - public final BytesRef term; - public final long v; - public final BytesRef payload; - public final boolean hasPayloads; - - public TermFreqPayload(BytesRef term, long v, BytesRef payload) { - this(term, v, payload, true); - } - - public TermFreqPayload(String term, long v, BytesRef payload) { - this(new BytesRef(term), v, payload, true); - } - - public TermFreqPayload(BytesRef term, long v) { - this(term, v, null, false); - } - - public TermFreqPayload(String term, long v) { - this(new BytesRef(term), v, null, false); - } - - public TermFreqPayload(BytesRef term, long v, BytesRef payload, boolean hasPayloads) { - this.term = term; - this.v = v; - this.payload = payload; - this.hasPayloads = hasPayloads; - } - - public boolean hasPayloads() { - return hasPayloads; - } -} \ No newline at end of file diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayloadArrayIterator.java lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayloadArrayIterator.java deleted file mode 100644 index 6583f73..0000000 --- lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayloadArrayIterator.java +++ /dev/null @@ -1,82 +0,0 @@ -package org.apache.lucene.search.suggest; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.util.Arrays; -import java.util.Iterator; - -import org.apache.lucene.search.spell.TermFreqPayloadIterator; -import org.apache.lucene.util.BytesRef; - -/** - * A {@link TermFreqPayloadIterator} over a sequence of {@link TermFreqPayload}s. - */ -public final class TermFreqPayloadArrayIterator implements TermFreqPayloadIterator { - private final Iterator i; - private final boolean hasPayloads; - private boolean first; - private TermFreqPayload current; - private final BytesRef spare = new BytesRef(); - - public TermFreqPayloadArrayIterator(Iterator i) { - this.i = i; - if (i.hasNext()) { - current = i.next(); - first = true; - this.hasPayloads = current.hasPayloads; - } else { - this.hasPayloads = false; - } - } - - public TermFreqPayloadArrayIterator(TermFreqPayload[] i) { - this(Arrays.asList(i)); - } - public TermFreqPayloadArrayIterator(Iterable i) { - this(i.iterator()); - } - - @Override - public long weight() { - return current.v; - } - - @Override - public BytesRef next() { - if (i.hasNext() || (first && current!=null)) { - if (first) { - first = false; - } else { - current = i.next(); - } - spare.copyBytes(current.term); - return spare; - } - return null; - } - - @Override - public BytesRef payload() { - return current.payload; - } - - @Override - public boolean hasPayloads() { - return hasPayloads; - } -} \ No newline at end of file diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/TestInputIterator.java lucene/suggest/src/test/org/apache/lucene/search/suggest/TestInputIterator.java new file mode 100644 index 0000000..b0c423d --- /dev/null +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/TestInputIterator.java @@ -0,0 +1,123 @@ +package org.apache.lucene.search.suggest; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +import java.util.AbstractMap.SimpleEntry; +import java.util.Comparator; +import java.util.Iterator; +import java.util.Map; +import java.util.Random; +import java.util.TreeMap; + +import org.apache.lucene.store.ByteArrayDataOutput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefHash; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; + +public class TestInputIterator extends LuceneTestCase { + + public void testEmpty() throws Exception { + InputArrayIterator iterator = new InputArrayIterator(new Input[0]); + InputIterator wrapper = new SortedInputIterator(iterator, BytesRef.getUTF8SortedAsUnicodeComparator()); + assertNull(wrapper.next()); + wrapper = new UnsortedInputIterator(iterator); + assertNull(wrapper.next()); + } + + public void testTerms() throws Exception { + Random random = random(); + int num = atLeast(10000); + + Comparator comparator = random.nextBoolean() ? BytesRef.getUTF8SortedAsUnicodeComparator() : BytesRef.getUTF8SortedAsUTF16Comparator(); + TreeMap> sorted = new TreeMap<>(comparator); + TreeMap sortedWithoutPayload = new TreeMap<>(comparator); + Input[] unsorted = new Input[num]; + Input[] unsortedWithoutPayload = new Input[num]; + + for (int i = 0; i < num; i++) { + BytesRef key; + BytesRef payload; + do { + key = new BytesRef(_TestUtil.randomUnicodeString(random)); + payload = new BytesRef(_TestUtil.randomUnicodeString(random)); + } while (sorted.containsKey(key)); + long value = random.nextLong(); + sortedWithoutPayload.put(key, value); + sorted.put(key, new SimpleEntry<>(value, payload)); + unsorted[i] = new Input(key, value, payload); + unsortedWithoutPayload[i] = new Input(key, value); + } + + // test the sorted iterator wrapper with payloads + InputIterator wrapper = new SortedInputIterator(new InputArrayIterator(unsorted), comparator); + Iterator>> expected = sorted.entrySet().iterator(); + while (expected.hasNext()) { + Map.Entry> entry = expected.next(); + + assertEquals(entry.getKey(), wrapper.next()); + assertEquals(entry.getValue().getKey().longValue(), wrapper.weight()); + assertEquals(entry.getValue().getValue(), wrapper.payload()); + } + assertNull(wrapper.next()); + + // test the unsorted iterator wrapper with payloads + wrapper = new UnsortedInputIterator(new InputArrayIterator(unsorted)); + TreeMap> actual = new TreeMap<>(); + BytesRef key; + while ((key = wrapper.next()) != null) { + long value = wrapper.weight(); + BytesRef payload = wrapper.payload(); + actual.put(BytesRef.deepCopyOf(key), new SimpleEntry<>(value, BytesRef.deepCopyOf(payload))); + } + assertEquals(sorted, actual); + + // test the sorted iterator wrapper without payloads + InputIterator wrapperWithoutPayload = new SortedInputIterator(new InputArrayIterator(unsortedWithoutPayload), comparator); + Iterator> expectedWithoutPayload = sortedWithoutPayload.entrySet().iterator(); + while (expectedWithoutPayload.hasNext()) { + Map.Entry entry = expectedWithoutPayload.next(); + + assertEquals(entry.getKey(), wrapperWithoutPayload.next()); + assertEquals(entry.getValue().longValue(), wrapperWithoutPayload.weight()); + assertNull(wrapperWithoutPayload.payload()); + } + assertNull(wrapperWithoutPayload.next()); + + // test the unsorted iterator wrapper without payloads + wrapperWithoutPayload = new UnsortedInputIterator(new InputArrayIterator(unsortedWithoutPayload)); + TreeMap actualWithoutPayload = new TreeMap<>(); + while ((key = wrapperWithoutPayload.next()) != null) { + long value = wrapperWithoutPayload.weight(); + assertNull(wrapperWithoutPayload.payload()); + actualWithoutPayload.put(BytesRef.deepCopyOf(key), value); + } + assertEquals(sortedWithoutPayload, actualWithoutPayload); + } + + public static long asLong(BytesRef b) { + return (((long) asIntInternal(b, b.offset) << 32) | asIntInternal(b, + b.offset + 4) & 0xFFFFFFFFL); + } + + private static int asIntInternal(BytesRef b, int pos) { + return ((b.bytes[pos++] & 0xFF) << 24) | ((b.bytes[pos++] & 0xFF) << 16) + | ((b.bytes[pos++] & 0xFF) << 8) | (b.bytes[pos] & 0xFF); + } +} diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqPayloadIterator.java lucene/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqPayloadIterator.java deleted file mode 100644 index e7d8257..0000000 --- lucene/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqPayloadIterator.java +++ /dev/null @@ -1,124 +0,0 @@ -package org.apache.lucene.search.suggest; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to You under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ - -import java.util.AbstractMap.SimpleEntry; -import java.util.Comparator; -import java.util.Iterator; -import java.util.Map; -import java.util.Random; -import java.util.TreeMap; - -import org.apache.lucene.search.spell.TermFreqPayloadIterator; -import org.apache.lucene.store.ByteArrayDataOutput; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.BytesRefHash; -import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util._TestUtil; - -public class TestTermFreqPayloadIterator extends LuceneTestCase { - - public void testEmpty() throws Exception { - TermFreqPayloadArrayIterator iterator = new TermFreqPayloadArrayIterator(new TermFreqPayload[0]); - TermFreqPayloadIterator wrapper = new SortedTermFreqPayloadIteratorWrapper(iterator, BytesRef.getUTF8SortedAsUnicodeComparator()); - assertNull(wrapper.next()); - wrapper = new UnsortedTermFreqPayloadIteratorWrapper(iterator); - assertNull(wrapper.next()); - } - - public void testTerms() throws Exception { - Random random = random(); - int num = atLeast(10000); - - Comparator comparator = random.nextBoolean() ? BytesRef.getUTF8SortedAsUnicodeComparator() : BytesRef.getUTF8SortedAsUTF16Comparator(); - TreeMap> sorted = new TreeMap<>(comparator); - TreeMap sortedWithoutPayload = new TreeMap<>(comparator); - TermFreqPayload[] unsorted = new TermFreqPayload[num]; - TermFreqPayload[] unsortedWithoutPayload = new TermFreqPayload[num]; - - for (int i = 0; i < num; i++) { - BytesRef key; - BytesRef payload; - do { - key = new BytesRef(_TestUtil.randomUnicodeString(random)); - payload = new BytesRef(_TestUtil.randomUnicodeString(random)); - } while (sorted.containsKey(key)); - long value = random.nextLong(); - sortedWithoutPayload.put(key, value); - sorted.put(key, new SimpleEntry<>(value, payload)); - unsorted[i] = new TermFreqPayload(key, value, payload); - unsortedWithoutPayload[i] = new TermFreqPayload(key, value); - } - - // test the sorted iterator wrapper with payloads - TermFreqPayloadIterator wrapper = new SortedTermFreqPayloadIteratorWrapper(new TermFreqPayloadArrayIterator(unsorted), comparator); - Iterator>> expected = sorted.entrySet().iterator(); - while (expected.hasNext()) { - Map.Entry> entry = expected.next(); - - assertEquals(entry.getKey(), wrapper.next()); - assertEquals(entry.getValue().getKey().longValue(), wrapper.weight()); - assertEquals(entry.getValue().getValue(), wrapper.payload()); - } - assertNull(wrapper.next()); - - // test the unsorted iterator wrapper with payloads - wrapper = new UnsortedTermFreqPayloadIteratorWrapper(new TermFreqPayloadArrayIterator(unsorted)); - TreeMap> actual = new TreeMap<>(); - BytesRef key; - while ((key = wrapper.next()) != null) { - long value = wrapper.weight(); - BytesRef payload = wrapper.payload(); - actual.put(BytesRef.deepCopyOf(key), new SimpleEntry<>(value, BytesRef.deepCopyOf(payload))); - } - assertEquals(sorted, actual); - - // test the sorted iterator wrapper without payloads - TermFreqPayloadIterator wrapperWithoutPayload = new SortedTermFreqPayloadIteratorWrapper(new TermFreqPayloadArrayIterator(unsortedWithoutPayload), comparator); - Iterator> expectedWithoutPayload = sortedWithoutPayload.entrySet().iterator(); - while (expectedWithoutPayload.hasNext()) { - Map.Entry entry = expectedWithoutPayload.next(); - - assertEquals(entry.getKey(), wrapperWithoutPayload.next()); - assertEquals(entry.getValue().longValue(), wrapperWithoutPayload.weight()); - assertNull(wrapperWithoutPayload.payload()); - } - assertNull(wrapperWithoutPayload.next()); - - // test the unsorted iterator wrapper without payloads - wrapperWithoutPayload = new UnsortedTermFreqPayloadIteratorWrapper(new TermFreqPayloadArrayIterator(unsortedWithoutPayload)); - TreeMap actualWithoutPayload = new TreeMap<>(); - while ((key = wrapperWithoutPayload.next()) != null) { - long value = wrapperWithoutPayload.weight(); - assertNull(wrapperWithoutPayload.payload()); - actualWithoutPayload.put(BytesRef.deepCopyOf(key), value); - } - assertEquals(sortedWithoutPayload, actualWithoutPayload); - } - - public static long asLong(BytesRef b) { - return (((long) asIntInternal(b, b.offset) << 32) | asIntInternal(b, - b.offset + 4) & 0xFFFFFFFFL); - } - - private static int asIntInternal(BytesRef b, int pos) { - return ((b.bytes[pos++] & 0xFF) << 24) | ((b.bytes[pos++] & 0xFF) << 16) - | ((b.bytes[pos++] & 0xFF) << 8) | (b.bytes[pos] & 0xFF); - } -} diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java index 84c6227..8113721 100644 --- lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java @@ -34,8 +34,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.search.suggest.Lookup.LookupResult; -import org.apache.lucene.search.suggest.TermFreqPayload; -import org.apache.lucene.search.suggest.TermFreqPayloadArrayIterator; +import org.apache.lucene.search.suggest.Input; +import org.apache.lucene.search.suggest.InputArrayIterator; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; @@ -47,9 +47,9 @@ import org.apache.lucene.util._TestUtil; public class AnalyzingInfixSuggesterTest extends LuceneTestCase { public void testBasic() throws Exception { - TermFreqPayload keys[] = new TermFreqPayload[] { - new TermFreqPayload("lend me your ear", 8, new BytesRef("foobar")), - new TermFreqPayload("a penny saved is a penny earned", 10, new BytesRef("foobaz")), + Input keys[] = new Input[] { + new Input("lend me your ear", 8, new BytesRef("foobar")), + new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")), }; File tempDir = _TestUtil.getTempDir("AnalyzingInfixSuggesterTest"); @@ -61,7 +61,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase { return newDirectory(); } }; - suggester.build(new TermFreqPayloadArrayIterator(keys)); + suggester.build(new InputArrayIterator(keys)); List results = suggester.lookup(_TestUtil.stringToCharSequence("ear", random()), 10, true, true); assertEquals(2, results.size()); @@ -95,9 +95,9 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase { } public void testAfterLoad() throws Exception { - TermFreqPayload keys[] = new TermFreqPayload[] { - new TermFreqPayload("lend me your ear", 8, new BytesRef("foobar")), - new TermFreqPayload("a penny saved is a penny earned", 10, new BytesRef("foobaz")), + Input keys[] = new Input[] { + new Input("lend me your ear", 8, new BytesRef("foobar")), + new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")), }; File tempDir = _TestUtil.getTempDir("AnalyzingInfixSuggesterTest"); @@ -109,7 +109,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase { return newFSDirectory(path); } }; - suggester.build(new TermFreqPayloadArrayIterator(keys)); + suggester.build(new InputArrayIterator(keys)); suggester.close(); suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) { @@ -150,8 +150,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase { @SuppressWarnings("unchecked") public void testHighlightAsObject() throws Exception { - TermFreqPayload keys[] = new TermFreqPayload[] { - new TermFreqPayload("a penny saved is a penny earned", 10, new BytesRef("foobaz")), + Input keys[] = new Input[] { + new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")), }; File tempDir = _TestUtil.getTempDir("AnalyzingInfixSuggesterTest"); @@ -204,7 +204,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase { } } }; - suggester.build(new TermFreqPayloadArrayIterator(keys)); + suggester.build(new InputArrayIterator(keys)); List results = suggester.lookup(_TestUtil.stringToCharSequence("ear", random()), 10, true, true); assertEquals(1, results.size()); @@ -230,9 +230,9 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase { } public void testRandomMinPrefixLength() throws Exception { - TermFreqPayload keys[] = new TermFreqPayload[] { - new TermFreqPayload("lend me your ear", 8, new BytesRef("foobar")), - new TermFreqPayload("a penny saved is a penny earned", 10, new BytesRef("foobaz")), + Input keys[] = new Input[] { + new Input("lend me your ear", 8, new BytesRef("foobar")), + new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")), }; File tempDir = _TestUtil.getTempDir("AnalyzingInfixSuggesterTest"); @@ -245,7 +245,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase { return newFSDirectory(path); } }; - suggester.build(new TermFreqPayloadArrayIterator(keys)); + suggester.build(new InputArrayIterator(keys)); for(int i=0;i<2;i++) { for(int j=0;j<2;j++) { @@ -312,8 +312,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase { } public void testHighlight() throws Exception { - TermFreqPayload keys[] = new TermFreqPayload[] { - new TermFreqPayload("a penny saved is a penny earned", 10, new BytesRef("foobaz")), + Input keys[] = new Input[] { + new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")), }; File tempDir = _TestUtil.getTempDir("AnalyzingInfixSuggesterTest"); @@ -325,7 +325,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase { return newDirectory(); } }; - suggester.build(new TermFreqPayloadArrayIterator(keys)); + suggester.build(new InputArrayIterator(keys)); List results = suggester.lookup(_TestUtil.stringToCharSequence("penn", random()), 10, true, true); assertEquals(1, results.size()); assertEquals("a penny saved is a penny earned", results.get(0).key); @@ -333,8 +333,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase { } public void testHighlightCaseChange() throws Exception { - TermFreqPayload keys[] = new TermFreqPayload[] { - new TermFreqPayload("a Penny saved is a penny earned", 10, new BytesRef("foobaz")), + Input keys[] = new Input[] { + new Input("a Penny saved is a penny earned", 10, new BytesRef("foobaz")), }; File tempDir = _TestUtil.getTempDir("AnalyzingInfixSuggesterTest"); @@ -346,7 +346,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase { return newDirectory(); } }; - suggester.build(new TermFreqPayloadArrayIterator(keys)); + suggester.build(new InputArrayIterator(keys)); List results = suggester.lookup(_TestUtil.stringToCharSequence("penn", random()), 10, true, true); assertEquals(1, results.size()); assertEquals("a Penny saved is a penny earned", results.get(0).key); @@ -367,7 +367,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase { return newDirectory(); } }; - suggester.build(new TermFreqPayloadArrayIterator(keys)); + suggester.build(new InputArrayIterator(keys)); results = suggester.lookup(_TestUtil.stringToCharSequence("penn", random()), 10, true, true); assertEquals(1, results.size()); assertEquals("a Penny saved is a penny earned", results.get(0).key); @@ -375,8 +375,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase { } public void testDoubleClose() throws Exception { - TermFreqPayload keys[] = new TermFreqPayload[] { - new TermFreqPayload("a penny saved is a penny earned", 10, new BytesRef("foobaz")), + Input keys[] = new Input[] { + new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")), }; File tempDir = _TestUtil.getTempDir("AnalyzingInfixSuggesterTest"); @@ -388,7 +388,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase { return newDirectory(); } }; - suggester.build(new TermFreqPayloadArrayIterator(keys)); + suggester.build(new InputArrayIterator(keys)); suggester.close(); suggester.close(); } @@ -422,11 +422,11 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase { } }; - TermFreqPayload keys[] = new TermFreqPayload[] { - new TermFreqPayload("a bob for apples", 10, new BytesRef("foobaz")), + Input keys[] = new Input[] { + new Input("a bob for apples", 10, new BytesRef("foobaz")), }; - suggester.build(new TermFreqPayloadArrayIterator(keys)); + suggester.build(new InputArrayIterator(keys)); List results = suggester.lookup(_TestUtil.stringToCharSequence("a", random()), 10, true, true); assertEquals(1, results.size()); assertEquals("a bob for apples", results.get(0).key); diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java index f367f36..eca1d26 100644 --- lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java @@ -52,8 +52,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.search.suggest.Lookup.LookupResult; -import org.apache.lucene.search.suggest.TermFreqPayload; -import org.apache.lucene.search.suggest.TermFreqPayloadArrayIterator; +import org.apache.lucene.search.suggest.Input; +import org.apache.lucene.search.suggest.InputArrayIterator; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LineFileDocs; import org.apache.lucene.util.LuceneTestCase; @@ -63,18 +63,18 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { /** this is basically the WFST test ported to KeywordAnalyzer. so it acts the same */ public void testKeyword() throws Exception { - Iterable keys = shuffle( - new TermFreqPayload("foo", 50), - new TermFreqPayload("bar", 10), - new TermFreqPayload("barbar", 10), - new TermFreqPayload("barbar", 12), - new TermFreqPayload("barbara", 6), - new TermFreqPayload("bar", 5), - new TermFreqPayload("barbara", 1) + Iterable keys = shuffle( + new Input("foo", 50), + new Input("bar", 10), + new Input("barbar", 10), + new Input("barbar", 12), + new Input("barbara", 6), + new Input("bar", 5), + new Input("barbara", 1) ); AnalyzingSuggester suggester = new AnalyzingSuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)); - suggester.build(new TermFreqPayloadArrayIterator(keys)); + suggester.build(new InputArrayIterator(keys)); // top N of 2, but only foo is available List results = suggester.lookup(_TestUtil.stringToCharSequence("f", random()), false, 2); @@ -109,16 +109,16 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { } public void testKeywordWithPayloads() throws Exception { - Iterable keys = shuffle( - new TermFreqPayload("foo", 50, new BytesRef("hello")), - new TermFreqPayload("bar", 10, new BytesRef("goodbye")), - new TermFreqPayload("barbar", 12, new BytesRef("thank you")), - new TermFreqPayload("bar", 9, new BytesRef("should be deduplicated")), - new TermFreqPayload("bar", 8, new BytesRef("should also be deduplicated")), - new TermFreqPayload("barbara", 6, new BytesRef("for all the fish"))); + Iterable keys = shuffle( + new Input("foo", 50, new BytesRef("hello")), + new Input("bar", 10, new BytesRef("goodbye")), + new Input("barbar", 12, new BytesRef("thank you")), + new Input("bar", 9, new BytesRef("should be deduplicated")), + new Input("bar", 8, new BytesRef("should also be deduplicated")), + new Input("barbara", 6, new BytesRef("for all the fish"))); AnalyzingSuggester suggester = new AnalyzingSuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)); - suggester.build(new TermFreqPayloadArrayIterator(keys)); + suggester.build(new InputArrayIterator(keys)); for (int i = 0; i < 2; i++) { // top N of 2, but only foo is available List results = suggester.lookup(_TestUtil.stringToCharSequence("f", random()), false, 2); @@ -163,14 +163,14 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { public void testRandomRealisticKeys() throws IOException { LineFileDocs lineFile = new LineFileDocs(random()); Map mapping = new HashMap<>(); - List keys = new ArrayList<>(); + List keys = new ArrayList<>(); int howMany = atLeast(100); // this might bring up duplicates for (int i = 0; i < howMany; i++) { Document nextDoc = lineFile.nextDoc(); String title = nextDoc.getField("title").stringValue(); int randomWeight = random().nextInt(100); - keys.add(new TermFreqPayload(title, randomWeight)); + keys.add(new Input(title, randomWeight)); if (!mapping.containsKey(title) || mapping.get(title) < randomWeight) { mapping.put(title, Long.valueOf(randomWeight)); } @@ -180,16 +180,16 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { analyzingSuggester.setPreservePositionIncrements(random().nextBoolean()); boolean doPayloads = random().nextBoolean(); if (doPayloads) { - List keysAndPayloads = new ArrayList<>(); - for (TermFreqPayload termFreq : keys) { - keysAndPayloads.add(new TermFreqPayload(termFreq.term, termFreq.v, new BytesRef(Long.toString(termFreq.v)))); + List keysAndPayloads = new ArrayList<>(); + for (Input termFreq : keys) { + keysAndPayloads.add(new Input(termFreq.term, termFreq.v, new BytesRef(Long.toString(termFreq.v)))); } - analyzingSuggester.build(new TermFreqPayloadArrayIterator(keysAndPayloads)); + analyzingSuggester.build(new InputArrayIterator(keysAndPayloads)); } else { - analyzingSuggester.build(new TermFreqPayloadArrayIterator(keys)); + analyzingSuggester.build(new InputArrayIterator(keys)); } - for (TermFreqPayload termFreq : keys) { + for (Input termFreq : keys) { List lookup = analyzingSuggester.lookup(termFreq.term.utf8ToString(), false, keys.size()); for (LookupResult lookupResult : lookup) { assertEquals(mapping.get(lookupResult.key), Long.valueOf(lookupResult.value)); @@ -209,14 +209,14 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { * basic "standardanalyzer" test with stopword removal */ public void testStandard() throws Exception { - TermFreqPayload keys[] = new TermFreqPayload[] { - new TermFreqPayload("the ghost of christmas past", 50), + Input keys[] = new Input[] { + new Input("the ghost of christmas past", 50), }; Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET); AnalyzingSuggester suggester = new AnalyzingSuggester(standard); suggester.setPreservePositionIncrements(false); - suggester.build(new TermFreqPayloadArrayIterator(keys)); + suggester.build(new InputArrayIterator(keys)); List results = suggester.lookup(_TestUtil.stringToCharSequence("the ghost of chris", random()), false, 1); assertEquals(1, results.size()); @@ -239,23 +239,23 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { public void testEmpty() throws Exception { Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET); AnalyzingSuggester suggester = new AnalyzingSuggester(standard); - suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[0])); + suggester.build(new InputArrayIterator(new Input[0])); List result = suggester.lookup("a", false, 20); assertTrue(result.isEmpty()); } public void testNoSeps() throws Exception { - TermFreqPayload[] keys = new TermFreqPayload[] { - new TermFreqPayload("ab cd", 0), - new TermFreqPayload("abcd", 1), + Input[] keys = new Input[] { + new Input("ab cd", 0), + new Input("abcd", 1), }; int options = 0; Analyzer a = new MockAnalyzer(random()); AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, options, 256, -1); - suggester.build(new TermFreqPayloadArrayIterator(keys)); + suggester.build(new InputArrayIterator(keys)); // TODO: would be nice if "ab " would allow the test to // pass, and more generally if the analyzer can know // that the user's current query has ended at a word, @@ -316,13 +316,13 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { } }; - TermFreqPayload keys[] = new TermFreqPayload[] { - new TermFreqPayload("wifi network is slow", 50), - new TermFreqPayload("wi fi network is fast", 10), + Input keys[] = new Input[] { + new Input("wifi network is slow", 50), + new Input("wi fi network is fast", 10), }; //AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer, AnalyzingSuggester.EXACT_FIRST, 256, -1); AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer); - suggester.build(new TermFreqPayloadArrayIterator(keys)); + suggester.build(new InputArrayIterator(keys)); List results = suggester.lookup("wifi network", false, 10); if (VERBOSE) { System.out.println("Results: " + results); @@ -382,12 +382,12 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { } }; - TermFreqPayload keys[] = new TermFreqPayload[] { - new TermFreqPayload("ab xc", 50), - new TermFreqPayload("ba xd", 50), + Input keys[] = new Input[] { + new Input("ab xc", 50), + new Input("ba xd", 50), }; AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer); - suggester.build(new TermFreqPayloadArrayIterator(keys)); + suggester.build(new InputArrayIterator(keys)); List results = suggester.lookup("ab x", false, 1); assertTrue(results.size() == 1); } @@ -460,11 +460,11 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { Analyzer a = getUnusualAnalyzer(); AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1); - suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { - new TermFreqPayload("x y", 1), - new TermFreqPayload("x y z", 3), - new TermFreqPayload("x", 2), - new TermFreqPayload("z z z", 20), + suggester.build(new InputArrayIterator(new Input[] { + new Input("x y", 1), + new Input("x y z", 3), + new Input("x", 2), + new Input("z z z", 20), })); //System.out.println("ALL: " + suggester.lookup("x y", false, 6)); @@ -500,11 +500,11 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { Analyzer a = getUnusualAnalyzer(); AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1); - suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { - new TermFreqPayload("x y", 1), - new TermFreqPayload("x y z", 3), - new TermFreqPayload("x", 2), - new TermFreqPayload("z z z", 20), + suggester.build(new InputArrayIterator(new Input[] { + new Input("x y", 1), + new Input("x y z", 3), + new Input("x", 2), + new Input("z z z", 20), })); for(int topN=1;topN<6;topN++) { @@ -655,12 +655,12 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { boolean doPayloads = random().nextBoolean(); - TermFreqPayload[] keys = null; - TermFreqPayload[] payloadKeys = null; + Input[] keys = null; + Input[] payloadKeys = null; if (doPayloads) { - payloadKeys = new TermFreqPayload[numQueries]; + payloadKeys = new Input[numQueries]; } else { - keys = new TermFreqPayload[numQueries]; + keys = new Input[numQueries]; } boolean preserveSep = random().nextBoolean(); @@ -731,9 +731,9 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { byte[] bytes = new byte[random().nextInt(10)]; random().nextBytes(bytes); payload = new BytesRef(bytes); - payloadKeys[i] = new TermFreqPayload(key, weight, payload); + payloadKeys[i] = new Input(key, weight, payload); } else { - keys[i] = new TermFreqPayload(key, weight); + keys[i] = new Input(key, weight); payload = null; } @@ -754,9 +754,9 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, preserveSep ? AnalyzingSuggester.PRESERVE_SEP : 0, 256, -1); if (doPayloads) { - suggester.build(new TermFreqPayloadArrayIterator(shuffle(payloadKeys))); + suggester.build(new InputArrayIterator(shuffle(payloadKeys))); } else { - suggester.build(new TermFreqPayloadArrayIterator(shuffle(keys))); + suggester.build(new InputArrayIterator(shuffle(keys))); } for (String prefix : allPrefixes) { @@ -874,8 +874,8 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { public void testMaxSurfaceFormsPerAnalyzedForm() throws Exception { Analyzer a = new MockAnalyzer(random()); AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 2, -1); - suggester.build(new TermFreqPayloadArrayIterator(shuffle(new TermFreqPayload("a", 40), - new TermFreqPayload("a ", 50), new TermFreqPayload(" a", 60)))); + suggester.build(new InputArrayIterator(shuffle(new Input("a", 40), + new Input("a ", 50), new Input(" a", 60)))); List results = suggester.lookup("a", false, 5); assertEquals(2, results.size()); @@ -889,11 +889,11 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { Analyzer a = new MockAnalyzer(random()); AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.EXACT_FIRST, 256, -1); - suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { - new TermFreqPayload("a", 2), - new TermFreqPayload("a b c", 3), - new TermFreqPayload("a c a", 1), - new TermFreqPayload("a c b", 1), + suggester.build(new InputArrayIterator(new Input[] { + new Input("a", 2), + new Input("a b c", 3), + new Input("a c a", 1), + new Input("a c b", 1), })); suggester.lookup("a", false, 4); @@ -905,10 +905,10 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.EXACT_FIRST, 256, -1); - suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { - new TermFreqPayload("a", 5), - new TermFreqPayload("a b", 3), - new TermFreqPayload("a c", 4), + suggester.build(new InputArrayIterator(new Input[] { + new Input("a", 5), + new Input("a b", 3), + new Input("a c", 4), })); List results = suggester.lookup("a", false, 3); @@ -970,9 +970,9 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1); - suggester.build(new TermFreqPayloadArrayIterator(shuffle( - new TermFreqPayload("hambone", 6), - new TermFreqPayload("nellie", 5)))); + suggester.build(new InputArrayIterator(shuffle( + new Input("hambone", 6), + new Input("nellie", 5)))); List results = suggester.lookup("nellie", false, 2); assertEquals(2, results.size()); @@ -1039,9 +1039,9 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1); - suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { - new TermFreqPayload("a", 6), - new TermFreqPayload("b", 5), + suggester.build(new InputArrayIterator(new Input[] { + new Input("a", 6), + new Input("b", 5), })); List results = suggester.lookup("a", false, 2); @@ -1112,21 +1112,21 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1); - suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { - new TermFreqPayload("a a", 50), - new TermFreqPayload("a b", 50), + suggester.build(new InputArrayIterator(new Input[] { + new Input("a a", 50), + new Input("a b", 50), })); } public void testDupSurfaceFormsMissingResults3() throws Exception { Analyzer a = new MockAnalyzer(random()); AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1); - suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { - new TermFreqPayload("a a", 7), - new TermFreqPayload("a a", 7), - new TermFreqPayload("a c", 6), - new TermFreqPayload("a c", 3), - new TermFreqPayload("a b", 5), + suggester.build(new InputArrayIterator(new Input[] { + new Input("a a", 7), + new Input("a a", 7), + new Input("a c", 6), + new Input("a c", 3), + new Input("a b", 5), })); assertEquals("[a a/7, a c/6, a b/5]", suggester.lookup("a", false, 3).toString()); } @@ -1134,9 +1134,9 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { public void testEndingSpace() throws Exception { Analyzer a = new MockAnalyzer(random()); AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1); - suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { - new TermFreqPayload("i love lucy", 7), - new TermFreqPayload("isla de muerta", 8), + suggester.build(new InputArrayIterator(new Input[] { + new Input("i love lucy", 7), + new Input("isla de muerta", 8), })); assertEquals("[isla de muerta/8, i love lucy/7]", suggester.lookup("i", false, 3).toString()); assertEquals("[i love lucy/7]", suggester.lookup("i ", false, 3).toString()); @@ -1167,15 +1167,15 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { }; AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, 1); - suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {new TermFreqPayload("a", 1)})); + suggester.build(new InputArrayIterator(new Input[] {new Input("a", 1)})); assertEquals("[a/1]", suggester.lookup("a", false, 1).toString()); } public void testIllegalLookupArgument() throws Exception { Analyzer a = new MockAnalyzer(random()); AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1); - suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { - new TermFreqPayload("а где Люси?", 7), + suggester.build(new InputArrayIterator(new Input[] { + new Input("а где Люси?", 7), })); try { suggester.lookup("а\u001E", false, 3); diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java index e65f2bc..3d24856 100644 --- lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java @@ -41,8 +41,8 @@ import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.search.suggest.Lookup.LookupResult; -import org.apache.lucene.search.suggest.TermFreqPayload; -import org.apache.lucene.search.suggest.TermFreqPayloadArrayIterator; +import org.apache.lucene.search.suggest.Input; +import org.apache.lucene.search.suggest.InputArrayIterator; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.LuceneTestCase; @@ -54,16 +54,16 @@ import org.apache.lucene.util.fst.Util; public class FuzzySuggesterTest extends LuceneTestCase { public void testRandomEdits() throws IOException { - List keys = new ArrayList(); + List keys = new ArrayList(); int numTerms = atLeast(100); for (int i = 0; i < numTerms; i++) { - keys.add(new TermFreqPayload("boo" + _TestUtil.randomSimpleString(random()), 1 + random().nextInt(100))); + keys.add(new Input("boo" + _TestUtil.randomSimpleString(random()), 1 + random().nextInt(100))); } - keys.add(new TermFreqPayload("foo bar boo far", 12)); + keys.add(new Input("foo bar boo far", 12)); MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false); FuzzySuggester suggester = new FuzzySuggester(analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS, 0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, FuzzySuggester.DEFAULT_UNICODE_AWARE); - suggester.build(new TermFreqPayloadArrayIterator(keys)); + suggester.build(new InputArrayIterator(keys)); int numIters = atLeast(10); for (int i = 0; i < numIters; i++) { String addRandomEdit = addRandomEdit("foo bar boo", FuzzySuggester.DEFAULT_NON_FUZZY_PREFIX); @@ -75,16 +75,16 @@ public class FuzzySuggesterTest extends LuceneTestCase { } public void testNonLatinRandomEdits() throws IOException { - List keys = new ArrayList(); + List keys = new ArrayList(); int numTerms = atLeast(100); for (int i = 0; i < numTerms; i++) { - keys.add(new TermFreqPayload("буу" + _TestUtil.randomSimpleString(random()), 1 + random().nextInt(100))); + keys.add(new Input("буу" + _TestUtil.randomSimpleString(random()), 1 + random().nextInt(100))); } - keys.add(new TermFreqPayload("фуу бар буу фар", 12)); + keys.add(new Input("фуу бар буу фар", 12)); MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false); FuzzySuggester suggester = new FuzzySuggester(analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS, 0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, true); - suggester.build(new TermFreqPayloadArrayIterator(keys)); + suggester.build(new InputArrayIterator(keys)); int numIters = atLeast(10); for (int i = 0; i < numIters; i++) { String addRandomEdit = addRandomEdit("фуу бар буу", 0); @@ -97,15 +97,15 @@ public class FuzzySuggesterTest extends LuceneTestCase { /** this is basically the WFST test ported to KeywordAnalyzer. so it acts the same */ public void testKeyword() throws Exception { - TermFreqPayload keys[] = new TermFreqPayload[] { - new TermFreqPayload("foo", 50), - new TermFreqPayload("bar", 10), - new TermFreqPayload("barbar", 12), - new TermFreqPayload("barbara", 6) + Input keys[] = new Input[] { + new Input("foo", 50), + new Input("bar", 10), + new Input("barbar", 12), + new Input("barbara", 6) }; FuzzySuggester suggester = new FuzzySuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)); - suggester.build(new TermFreqPayloadArrayIterator(keys)); + suggester.build(new InputArrayIterator(keys)); List results = suggester.lookup(_TestUtil.stringToCharSequence("bariar", random()), false, 2); assertEquals(2, results.size()); @@ -172,14 +172,14 @@ public class FuzzySuggesterTest extends LuceneTestCase { * basic "standardanalyzer" test with stopword removal */ public void testStandard() throws Exception { - TermFreqPayload keys[] = new TermFreqPayload[] { - new TermFreqPayload("the ghost of christmas past", 50), + Input keys[] = new Input[] { + new Input("the ghost of christmas past", 50), }; Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET); FuzzySuggester suggester = new FuzzySuggester(standard); suggester.setPreservePositionIncrements(false); - suggester.build(new TermFreqPayloadArrayIterator(keys)); + suggester.build(new InputArrayIterator(keys)); List results = suggester.lookup(_TestUtil.stringToCharSequence("the ghost of chris", random()), false, 1); assertEquals(1, results.size()); @@ -200,16 +200,16 @@ public class FuzzySuggesterTest extends LuceneTestCase { } public void testNoSeps() throws Exception { - TermFreqPayload[] keys = new TermFreqPayload[] { - new TermFreqPayload("ab cd", 0), - new TermFreqPayload("abcd", 1), + Input[] keys = new Input[] { + new Input("ab cd", 0), + new Input("abcd", 1), }; int options = 0; Analyzer a = new MockAnalyzer(random()); FuzzySuggester suggester = new FuzzySuggester(a, a, options, 256, -1, 1, true, 1, 3, false); - suggester.build(new TermFreqPayloadArrayIterator(keys)); + suggester.build(new InputArrayIterator(keys)); // TODO: would be nice if "ab " would allow the test to // pass, and more generally if the analyzer can know // that the user's current query has ended at a word, @@ -270,12 +270,12 @@ public class FuzzySuggesterTest extends LuceneTestCase { } }; - TermFreqPayload keys[] = new TermFreqPayload[] { - new TermFreqPayload("wifi network is slow", 50), - new TermFreqPayload("wi fi network is fast", 10), + Input keys[] = new Input[] { + new Input("wifi network is slow", 50), + new Input("wi fi network is fast", 10), }; FuzzySuggester suggester = new FuzzySuggester(analyzer); - suggester.build(new TermFreqPayloadArrayIterator(keys)); + suggester.build(new InputArrayIterator(keys)); List results = suggester.lookup("wifi network", false, 10); if (VERBOSE) { @@ -290,7 +290,7 @@ public class FuzzySuggesterTest extends LuceneTestCase { public void testEmpty() throws Exception { FuzzySuggester suggester = new FuzzySuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)); - suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[0])); + suggester.build(new InputArrayIterator(new Input[0])); List result = suggester.lookup("a", false, 20); assertTrue(result.isEmpty()); @@ -344,12 +344,12 @@ public class FuzzySuggesterTest extends LuceneTestCase { } }; - TermFreqPayload keys[] = new TermFreqPayload[] { - new TermFreqPayload("ab xc", 50), - new TermFreqPayload("ba xd", 50), + Input keys[] = new Input[] { + new Input("ab xc", 50), + new Input("ba xd", 50), }; FuzzySuggester suggester = new FuzzySuggester(analyzer); - suggester.build(new TermFreqPayloadArrayIterator(keys)); + suggester.build(new InputArrayIterator(keys)); List results = suggester.lookup("ab x", false, 1); assertTrue(results.size() == 1); } @@ -418,11 +418,11 @@ public class FuzzySuggesterTest extends LuceneTestCase { Analyzer a = getUnusualAnalyzer(); FuzzySuggester suggester = new FuzzySuggester(a, a, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1, 1, true, 1, 3, false); - suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { - new TermFreqPayload("x y", 1), - new TermFreqPayload("x y z", 3), - new TermFreqPayload("x", 2), - new TermFreqPayload("z z z", 20), + suggester.build(new InputArrayIterator(new Input[] { + new Input("x y", 1), + new Input("x y z", 3), + new Input("x", 2), + new Input("z z z", 20), })); //System.out.println("ALL: " + suggester.lookup("x y", false, 6)); @@ -458,11 +458,11 @@ public class FuzzySuggesterTest extends LuceneTestCase { Analyzer a = getUnusualAnalyzer(); FuzzySuggester suggester = new FuzzySuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1, 1, true, 1, 3, false); - suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { - new TermFreqPayload("x y", 1), - new TermFreqPayload("x y z", 3), - new TermFreqPayload("x", 2), - new TermFreqPayload("z z z", 20), + suggester.build(new InputArrayIterator(new Input[] { + new Input("x y", 1), + new Input("x y z", 3), + new Input("x", 2), + new Input("z z z", 20), })); for(int topN=1;topN<6;topN++) { @@ -600,7 +600,7 @@ public class FuzzySuggesterTest extends LuceneTestCase { final TreeSet allPrefixes = new TreeSet(); final Set seen = new HashSet(); - TermFreqPayload[] keys = new TermFreqPayload[numQueries]; + Input[] keys = new Input[numQueries]; boolean preserveSep = random().nextBoolean(); boolean unicodeAware = random().nextBoolean(); @@ -666,7 +666,7 @@ public class FuzzySuggesterTest extends LuceneTestCase { } // we can probably do Integer.MAX_VALUE here, but why worry. int weight = random().nextInt(1<<24); - keys[i] = new TermFreqPayload(key, weight); + keys[i] = new Input(key, weight); slowCompletor.add(new TermFreqPayload2(key, analyzedKey, weight)); } @@ -684,7 +684,7 @@ public class FuzzySuggesterTest extends LuceneTestCase { Analyzer a = new MockTokenEatingAnalyzer(numStopChars, preserveHoles); FuzzySuggester suggester = new FuzzySuggester(a, a, preserveSep ? AnalyzingSuggester.PRESERVE_SEP : 0, 256, -1, 1, false, 1, 3, unicodeAware); - suggester.build(new TermFreqPayloadArrayIterator(keys)); + suggester.build(new InputArrayIterator(keys)); for (String prefix : allPrefixes) { @@ -825,14 +825,14 @@ public class FuzzySuggesterTest extends LuceneTestCase { Analyzer a = new MockAnalyzer(random()); FuzzySuggester suggester = new FuzzySuggester(a, a, 0, 2, -1, 1, true, 1, 3, false); - List keys = Arrays.asList(new TermFreqPayload[] { - new TermFreqPayload("a", 40), - new TermFreqPayload("a ", 50), - new TermFreqPayload(" a", 60), + List keys = Arrays.asList(new Input[] { + new Input("a", 40), + new Input("a ", 50), + new Input(" a", 60), }); Collections.shuffle(keys, random()); - suggester.build(new TermFreqPayloadArrayIterator(keys)); + suggester.build(new InputArrayIterator(keys)); List results = suggester.lookup("a", false, 5); assertEquals(2, results.size()); @@ -846,15 +846,15 @@ public class FuzzySuggesterTest extends LuceneTestCase { Analyzer a = new MockAnalyzer(random()); FuzzySuggester suggester = new FuzzySuggester(a, a, FuzzySuggester.PRESERVE_SEP, 2, -1, 2, true, 1, 3, false); - List keys = Arrays.asList(new TermFreqPayload[] { - new TermFreqPayload("foo bar", 40), - new TermFreqPayload("foo bar baz", 50), - new TermFreqPayload("barbaz", 60), - new TermFreqPayload("barbazfoo", 10), + List keys = Arrays.asList(new Input[] { + new Input("foo bar", 40), + new Input("foo bar baz", 50), + new Input("barbaz", 60), + new Input("barbazfoo", 10), }); Collections.shuffle(keys, random()); - suggester.build(new TermFreqPayloadArrayIterator(keys)); + suggester.build(new InputArrayIterator(keys)); assertEquals("[foo bar baz/50, foo bar/40]", suggester.lookup("foobar", false, 5).toString()); assertEquals("[foo bar baz/50]", suggester.lookup("foobarbaz", false, 5).toString()); @@ -929,25 +929,25 @@ public class FuzzySuggesterTest extends LuceneTestCase { public void testRandom2() throws Throwable { final int NUM = atLeast(200); - final List answers = new ArrayList(); + final List answers = new ArrayList(); final Set seen = new HashSet(); for(int i=0;i() { + Collections.sort(answers, new Comparator() { @Override - public int compare(TermFreqPayload a, TermFreqPayload b) { + public int compare(Input a, Input b) { return a.term.compareTo(b.term); } }); if (VERBOSE) { System.out.println("\nTEST: targets"); - for(TermFreqPayload tf : answers) { + for(Input tf : answers) { System.out.println(" " + tf.term.utf8ToString() + " freq=" + tf.v); } } @@ -965,7 +965,7 @@ public class FuzzySuggesterTest extends LuceneTestCase { } Collections.shuffle(answers, random()); - suggest.build(new TermFreqPayloadArrayIterator(answers.toArray(new TermFreqPayload[answers.size()]))); + suggest.build(new InputArrayIterator(answers.toArray(new Input[answers.size()]))); final int ITERS = atLeast(100); for(int iter=0;iter slowFuzzyMatch(int prefixLen, int maxEdits, boolean allowTransposition, List answers, String frag) { + private List slowFuzzyMatch(int prefixLen, int maxEdits, boolean allowTransposition, List answers, String frag) { final List results = new ArrayList(); final int fragLen = frag.length(); - for(TermFreqPayload tf : answers) { + for(Input tf : answers) { //System.out.println(" check s=" + tf.term.utf8ToString()); boolean prefixMatches = true; for(int i=0;i keys = shuffle( - new TermFreqPayload("foo bar baz blah", 50), - new TermFreqPayload("boo foo bar foo bee", 20) + Iterable keys = shuffle( + new Input("foo bar baz blah", 50), + new Input("boo foo bar foo bee", 20) ); Analyzer a = new MockAnalyzer(random()); FreeTextSuggester sug = new FreeTextSuggester(a, a, 2, (byte) 0x20); - sug.build(new TermFreqPayloadArrayIterator(keys)); + sug.build(new InputArrayIterator(keys)); for(int i=0;i<2;i++) { @@ -101,12 +101,12 @@ public class TestFreeTextSuggester extends LuceneTestCase { public void testIllegalByteDuringBuild() throws Exception { // Default separator is INFORMATION SEPARATOR TWO // (0x1e), so no input token is allowed to contain it - Iterable keys = shuffle( - new TermFreqPayload("foo\u001ebar baz", 50) + Iterable keys = shuffle( + new Input("foo\u001ebar baz", 50) ); FreeTextSuggester sug = new FreeTextSuggester(new MockAnalyzer(random())); try { - sug.build(new TermFreqPayloadArrayIterator(keys)); + sug.build(new InputArrayIterator(keys)); fail("did not hit expected exception"); } catch (IllegalArgumentException iae) { // expected @@ -116,11 +116,11 @@ public class TestFreeTextSuggester extends LuceneTestCase { public void testIllegalByteDuringQuery() throws Exception { // Default separator is INFORMATION SEPARATOR TWO // (0x1e), so no input token is allowed to contain it - Iterable keys = shuffle( - new TermFreqPayload("foo bar baz", 50) + Iterable keys = shuffle( + new Input("foo bar baz", 50) ); FreeTextSuggester sug = new FreeTextSuggester(new MockAnalyzer(random())); - sug.build(new TermFreqPayloadArrayIterator(keys)); + sug.build(new InputArrayIterator(keys)); try { sug.lookup("foo\u001eb", 10); @@ -136,7 +136,7 @@ public class TestFreeTextSuggester extends LuceneTestCase { // Skip header: lfd.nextDoc(); FreeTextSuggester sug = new FreeTextSuggester(new MockAnalyzer(random())); - sug.build(new TermFreqPayloadIterator() { + sug.build(new InputIterator() { private int count; @@ -185,13 +185,13 @@ public class TestFreeTextSuggester extends LuceneTestCase { // Make sure you can suggest based only on unigram model: public void testUnigrams() throws Exception { - Iterable keys = shuffle( - new TermFreqPayload("foo bar baz blah boo foo bar foo bee", 50) + Iterable keys = shuffle( + new Input("foo bar baz blah boo foo bar foo bee", 50) ); Analyzer a = new MockAnalyzer(random()); FreeTextSuggester sug = new FreeTextSuggester(a, a, 1, (byte) 0x20); - sug.build(new TermFreqPayloadArrayIterator(keys)); + sug.build(new InputArrayIterator(keys)); // Sorts first by count, descending, second by term, ascending assertEquals("bar/0.22 baz/0.11 bee/0.11 blah/0.11 boo/0.11", toString(sug.lookup("b", 10))); @@ -199,24 +199,24 @@ public class TestFreeTextSuggester extends LuceneTestCase { // Make sure the last token is not duplicated public void testNoDupsAcrossGrams() throws Exception { - Iterable keys = shuffle( - new TermFreqPayload("foo bar bar bar bar", 50) + Iterable keys = shuffle( + new Input("foo bar bar bar bar", 50) ); Analyzer a = new MockAnalyzer(random()); FreeTextSuggester sug = new FreeTextSuggester(a, a, 2, (byte) 0x20); - sug.build(new TermFreqPayloadArrayIterator(keys)); + sug.build(new InputArrayIterator(keys)); assertEquals("foo bar/1.00", toString(sug.lookup("foo b", 10))); } // Lookup of just empty string produces unicode only matches: public void testEmptyString() throws Exception { - Iterable keys = shuffle( - new TermFreqPayload("foo bar bar bar bar", 50) + Iterable keys = shuffle( + new Input("foo bar bar bar bar", 50) ); Analyzer a = new MockAnalyzer(random()); FreeTextSuggester sug = new FreeTextSuggester(a, a, 2, (byte) 0x20); - sug.build(new TermFreqPayloadArrayIterator(keys)); + sug.build(new InputArrayIterator(keys)); try { sug.lookup("", 10); fail("did not hit exception"); @@ -238,11 +238,11 @@ public class TestFreeTextSuggester extends LuceneTestCase { } }; - Iterable keys = shuffle( - new TermFreqPayload("wizard of oz", 50) + Iterable keys = shuffle( + new Input("wizard of oz", 50) ); FreeTextSuggester sug = new FreeTextSuggester(a, a, 3, (byte) 0x20); - sug.build(new TermFreqPayloadArrayIterator(keys)); + sug.build(new InputArrayIterator(keys)); assertEquals("wizard _ oz/1.00", toString(sug.lookup("wizard of", 10))); @@ -266,11 +266,11 @@ public class TestFreeTextSuggester extends LuceneTestCase { } }; - Iterable keys = shuffle( - new TermFreqPayload("wizard of of oz", 50) + Iterable keys = shuffle( + new Input("wizard of of oz", 50) ); FreeTextSuggester sug = new FreeTextSuggester(a, a, 3, (byte) 0x20); - sug.build(new TermFreqPayloadArrayIterator(keys)); + sug.build(new InputArrayIterator(keys)); assertEquals("", toString(sug.lookup("wizard of of", 10))); } @@ -330,7 +330,7 @@ public class TestFreeTextSuggester extends LuceneTestCase { // Build suggester model: FreeTextSuggester sug = new FreeTextSuggester(a, a, grams, (byte) 0x20); - sug.build(new TermFreqPayloadIterator() { + sug.build(new InputIterator() { int upto; @Override diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java index 42594e7..de12673 100644 --- lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java @@ -28,8 +28,8 @@ import org.apache.lucene.util.*; * Unit tests for {@link FSTCompletion}. */ public class FSTCompletionTest extends LuceneTestCase { - public static TermFreqPayload tf(String t, int v) { - return new TermFreqPayload(t, v); + public static Input tf(String t, int v) { + return new Input(t, v); } private FSTCompletion completion; @@ -40,15 +40,15 @@ public class FSTCompletionTest extends LuceneTestCase { super.setUp(); FSTCompletionBuilder builder = new FSTCompletionBuilder(); - for (TermFreqPayload tf : evalKeys()) { + for (Input tf : evalKeys()) { builder.add(tf.term, (int) tf.v); } completion = builder.build(); completionAlphabetical = new FSTCompletion(completion.getFST(), false, true); } - private TermFreqPayload[] evalKeys() { - final TermFreqPayload[] keys = new TermFreqPayload[] { + private Input[] evalKeys() { + final Input[] keys = new Input[] { tf("one", 0), tf("oneness", 1), tf("onerous", 1), @@ -157,17 +157,17 @@ public class FSTCompletionTest extends LuceneTestCase { FSTCompletionLookup lookup = new FSTCompletionLookup(10, true); Random r = random(); - List keys = new ArrayList(); + List keys = new ArrayList(); for (int i = 0; i < 5000; i++) { - keys.add(new TermFreqPayload(_TestUtil.randomSimpleString(r), -1)); + keys.add(new Input(_TestUtil.randomSimpleString(r), -1)); } - lookup.build(new TermFreqPayloadArrayIterator(keys)); + lookup.build(new InputArrayIterator(keys)); // All the weights were constant, so all returned buckets must be constant, whatever they // are. Long previous = null; - for (TermFreqPayload tf : keys) { + for (Input tf : keys) { Long current = ((Number)lookup.get(_TestUtil.bytesToCharSequence(tf.term, random()))).longValue(); if (previous != null) { assertEquals(previous, current); @@ -177,11 +177,11 @@ public class FSTCompletionTest extends LuceneTestCase { } public void testMultilingualInput() throws Exception { - List input = LookupBenchmarkTest.readTop50KWiki(); + List input = LookupBenchmarkTest.readTop50KWiki(); FSTCompletionLookup lookup = new FSTCompletionLookup(); - lookup.build(new TermFreqPayloadArrayIterator(input)); - for (TermFreqPayload tf : input) { + lookup.build(new InputArrayIterator(input)); + for (Input tf : input) { assertNotNull("Not found: " + tf.term.toString(), lookup.get(_TestUtil.bytesToCharSequence(tf.term, random()))); assertEquals(tf.term.utf8ToString(), lookup.lookup(_TestUtil.bytesToCharSequence(tf.term, random()), true, 1).get(0).key.toString()); } @@ -198,17 +198,17 @@ public class FSTCompletionTest extends LuceneTestCase { } public void testRandom() throws Exception { - List freqs = new ArrayList(); + List freqs = new ArrayList(); Random rnd = random(); for (int i = 0; i < 2500 + rnd.nextInt(2500); i++) { int weight = rnd.nextInt(100); - freqs.add(new TermFreqPayload("" + rnd.nextLong(), weight)); + freqs.add(new Input("" + rnd.nextLong(), weight)); } FSTCompletionLookup lookup = new FSTCompletionLookup(); - lookup.build(new TermFreqPayloadArrayIterator(freqs.toArray(new TermFreqPayload[freqs.size()]))); + lookup.build(new InputArrayIterator(freqs.toArray(new Input[freqs.size()]))); - for (TermFreqPayload tf : freqs) { + for (Input tf : freqs) { final String term = tf.term.utf8ToString(); for (int i = 1; i < term.length(); i++) { String prefix = term.substring(0, i); diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java index 2fe2c7a..56cc62b 100644 --- lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java @@ -20,8 +20,8 @@ package org.apache.lucene.search.suggest.fst; import java.util.*; import org.apache.lucene.search.suggest.Lookup.LookupResult; -import org.apache.lucene.search.suggest.TermFreqPayload; -import org.apache.lucene.search.suggest.TermFreqPayloadArrayIterator; +import org.apache.lucene.search.suggest.Input; +import org.apache.lucene.search.suggest.InputArrayIterator; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; @@ -29,16 +29,16 @@ import org.apache.lucene.util._TestUtil; public class WFSTCompletionTest extends LuceneTestCase { public void testBasic() throws Exception { - TermFreqPayload keys[] = new TermFreqPayload[] { - new TermFreqPayload("foo", 50), - new TermFreqPayload("bar", 10), - new TermFreqPayload("barbar", 12), - new TermFreqPayload("barbara", 6) + Input keys[] = new Input[] { + new Input("foo", 50), + new Input("bar", 10), + new Input("barbar", 12), + new Input("barbara", 6) }; Random random = new Random(random().nextLong()); WFSTCompletionLookup suggester = new WFSTCompletionLookup(); - suggester.build(new TermFreqPayloadArrayIterator(keys)); + suggester.build(new InputArrayIterator(keys)); // top N of 2, but only foo is available List results = suggester.lookup(_TestUtil.stringToCharSequence("f", random), false, 2); @@ -81,9 +81,9 @@ public class WFSTCompletionTest extends LuceneTestCase { WFSTCompletionLookup suggester = new WFSTCompletionLookup(true); - suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { - new TermFreqPayload("x y", 20), - new TermFreqPayload("x", 2), + suggester.build(new InputArrayIterator(new Input[] { + new Input("x y", 20), + new Input("x", 2), })); for(int topN=1;topN<4;topN++) { @@ -105,9 +105,9 @@ public class WFSTCompletionTest extends LuceneTestCase { WFSTCompletionLookup suggester = new WFSTCompletionLookup(false); - suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { - new TermFreqPayload("x y", 20), - new TermFreqPayload("x", 2), + suggester.build(new InputArrayIterator(new Input[] { + new Input("x y", 20), + new Input("x", 2), })); for(int topN=1;topN<4;topN++) { @@ -131,7 +131,7 @@ public class WFSTCompletionTest extends LuceneTestCase { final TreeMap slowCompletor = new TreeMap(); final TreeSet allPrefixes = new TreeSet(); - TermFreqPayload[] keys = new TermFreqPayload[numWords]; + Input[] keys = new Input[numWords]; for (int i = 0; i < numWords; i++) { String s; @@ -150,11 +150,11 @@ public class WFSTCompletionTest extends LuceneTestCase { // we can probably do Integer.MAX_VALUE here, but why worry. int weight = random().nextInt(1<<24); slowCompletor.put(s, (long)weight); - keys[i] = new TermFreqPayload(s, weight); + keys[i] = new Input(s, weight); } WFSTCompletionLookup suggester = new WFSTCompletionLookup(false); - suggester.build(new TermFreqPayloadArrayIterator(keys)); + suggester.build(new InputArrayIterator(keys)); Random random = new Random(random().nextLong()); for (String prefix : allPrefixes) { @@ -205,16 +205,16 @@ public class WFSTCompletionTest extends LuceneTestCase { WFSTCompletionLookup suggester = new WFSTCompletionLookup(false); - suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { - new TermFreqPayload(key1, 50), - new TermFreqPayload(key2, 50), + suggester.build(new InputArrayIterator(new Input[] { + new Input(key1, 50), + new Input(key2, 50), })); } public void testEmpty() throws Exception { WFSTCompletionLookup suggester = new WFSTCompletionLookup(false); - suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[0])); + suggester.build(new InputArrayIterator(new Input[0])); List result = suggester.lookup("a", false, 20); assertTrue(result.isEmpty()); }