diff --git lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java index 187e327..5882fdf 100644 --- lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java +++ lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java @@ -59,7 +59,7 @@ public class HighFrequencyDictionary implements Dictionary { return new HighFrequencyIterator(); } - final class HighFrequencyIterator implements TermFreqIterator { + final class HighFrequencyIterator implements TermFreqPayloadIterator { private final BytesRef spare = new BytesRef(); private final TermsEnum termsEnum; private int minNumDocs; @@ -98,5 +98,15 @@ public class HighFrequencyDictionary implements Dictionary { } return null; } + + @Override + public BytesRef payload() { + return null; + } + + @Override + public boolean hasPayloads() { + return false; + } } } diff --git lucene/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java lucene/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java deleted file mode 100644 index d7ce627..0000000 --- lucene/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java +++ /dev/null @@ -1,58 +0,0 @@ -package org.apache.lucene.search.spell; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.BytesRefIterator; - -/** - * Interface for enumerating term,weight pairs. - */ -public interface TermFreqIterator extends BytesRefIterator { - - /** A term's weight, higher numbers mean better suggestions. */ - public long weight(); - - /** - * Wraps a BytesRefIterator as a TermFreqIterator, with all weights - * set to 1 - */ - public static class TermFreqIteratorWrapper implements TermFreqIterator { - private BytesRefIterator wrapped; - - /** - * Creates a new wrapper, wrapping the specified iterator and - * specifying a weight value of 1 for all terms. - */ - public TermFreqIteratorWrapper(BytesRefIterator wrapped) { - this.wrapped = wrapped; - } - - @Override - public long weight() { - return 1; - } - - @Override - public BytesRef next() throws IOException { - return wrapped.next(); - } - } -} diff --git lucene/suggest/src/java/org/apache/lucene/search/spell/TermFreqPayloadIterator.java lucene/suggest/src/java/org/apache/lucene/search/spell/TermFreqPayloadIterator.java index 5d3a59b..e780db4 100644 --- lucene/suggest/src/java/org/apache/lucene/search/spell/TermFreqPayloadIterator.java +++ lucene/suggest/src/java/org/apache/lucene/search/spell/TermFreqPayloadIterator.java @@ -17,20 +17,67 @@ package org.apache.lucene.search.spell; * limitations under the License. */ +import java.io.IOException; + import org.apache.lucene.search.suggest.Lookup.LookupResult; // javadocs +import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester; // javadocs import org.apache.lucene.search.suggest.analyzing.AnalyzingSuggester; // javadocs import org.apache.lucene.search.suggest.analyzing.FuzzySuggester; // javadocs import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefIterator; /** * Interface for enumerating term,weight,payload triples; - * currently only {@link AnalyzingSuggester} and {@link - * FuzzySuggester} support payloads. + * currently only {@link AnalyzingSuggester}, {@link + * FuzzySuggester} and {@link AnalyzingInfixSuggester} support payloads. */ -public interface TermFreqPayloadIterator extends TermFreqIterator { +public interface TermFreqPayloadIterator extends BytesRefIterator { + /** A term's weight, higher numbers mean better suggestions. */ + public long weight(); + /** An arbitrary byte[] to record per suggestion. See * {@link LookupResult#payload} to retrieve the payload * for each suggestion. */ public BytesRef payload(); + + /** Returns true if the iterator has payloads */ + public boolean hasPayloads(); + + /** + * Wraps a BytesRefIterator as a TermFreqPayloadIterator, with all weights + * set to 1 and carries no payload + */ + public static class TermFreqPayloadIteratorWrapper implements TermFreqPayloadIterator { + private final BytesRefIterator wrapped; + + /** + * Creates a new wrapper, wrapping the specified iterator and + * specifying a weight value of 1 for all terms + * and nullifies associated payloads. + */ + public TermFreqPayloadIteratorWrapper(BytesRefIterator wrapped) { + this.wrapped = wrapped; + } + + @Override + public long weight() { + return 1; + } + + @Override + public BytesRef next() throws IOException { + return wrapped.next(); + } + + @Override + public BytesRef payload() { + return null; + } + + @Override + public boolean hasPayloads() { + return false; + } + } } diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java deleted file mode 100644 index 6228667..0000000 --- lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java +++ /dev/null @@ -1,67 +0,0 @@ -package org.apache.lucene.search.suggest; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import org.apache.lucene.search.spell.TermFreqIterator; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.Counter; - -/** - * This wrapper buffers incoming elements. - * @lucene.experimental - */ -public class BufferingTermFreqIteratorWrapper implements TermFreqIterator { - // TODO keep this for now - /** buffered term entries */ - protected BytesRefArray entries = new BytesRefArray(Counter.newCounter()); - /** current buffer position */ - protected int curPos = -1; - /** buffered weights, parallel with {@link #entries} */ - protected long[] freqs = new long[1]; - private final BytesRef spare = new BytesRef(); - - /** Creates a new iterator, buffering entries from the specified iterator */ - public BufferingTermFreqIteratorWrapper(TermFreqIterator source) throws IOException { - BytesRef spare; - int freqIndex = 0; - while((spare = source.next()) != null) { - entries.append(spare); - if (freqIndex >= freqs.length) { - freqs = ArrayUtil.grow(freqs, freqs.length+1); - } - freqs[freqIndex++] = source.weight(); - } - - } - - @Override - public long weight() { - return freqs[curPos]; - } - - @Override - public BytesRef next() throws IOException { - if (++curPos < entries.size()) { - entries.get(spare, curPos); - return spare; - } - return null; - } -} diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqPayloadIteratorWrapper.java lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqPayloadIteratorWrapper.java new file mode 100644 index 0000000..b78ec0e --- /dev/null +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqPayloadIteratorWrapper.java @@ -0,0 +1,89 @@ +package org.apache.lucene.search.suggest; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.search.spell.TermFreqPayloadIterator; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.Counter; + +/** + * This wrapper buffers incoming elements. + * @lucene.experimental + */ +public class BufferingTermFreqPayloadIteratorWrapper implements TermFreqPayloadIterator { + // TODO keep this for now + /** buffered term entries */ + protected BytesRefArray entries = new BytesRefArray(Counter.newCounter()); + /** buffered payload entries */ + protected BytesRefArray payloads = new BytesRefArray(Counter.newCounter()); + /** current buffer position */ + protected int curPos = -1; + /** buffered weights, parallel with {@link #entries} */ + protected long[] freqs = new long[1]; + private final BytesRef spare = new BytesRef(); + private final BytesRef payloadSpare = new BytesRef(); + private final boolean hasPayloads; + + /** Creates a new iterator, buffering entries from the specified iterator */ + public BufferingTermFreqPayloadIteratorWrapper(TermFreqPayloadIterator source) throws IOException { + BytesRef spare; + int freqIndex = 0; + hasPayloads = source.hasPayloads(); + while((spare = source.next()) != null) { + entries.append(spare); + if (hasPayloads) { + payloads.append(source.payload()); + } + if (freqIndex >= freqs.length) { + freqs = ArrayUtil.grow(freqs, freqs.length+1); + } + freqs[freqIndex++] = source.weight(); + } + + } + + @Override + public long weight() { + return freqs[curPos]; + } + + @Override + public BytesRef next() throws IOException { + if (++curPos < entries.size()) { + entries.get(spare, curPos); + return spare; + } + return null; + } + + @Override + public BytesRef payload() { + if (hasPayloads && curPos < payloads.size()) { + return payloads.get(payloadSpare, curPos); + } + return null; + } + + @Override + public boolean hasPayloads() { + return hasPayloads; + } +} diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java index 3519961..425d9db 100644 --- lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java @@ -47,12 +47,6 @@ import org.apache.lucene.util.BytesRefIterator; * The term, weight and (optionally) payload fields supplied * are required for ALL documents and has to be stored * - *
  • - * This Dictionary implementation is not compatible with the following Suggesters: - * {@link JaspellLookup}, {@link TSTLookup}, {@link FSTCompletionLookup}, - * {@link WFSTCompletionLookup} and {@link AnalyzingInfixSuggester}. - * see https://issues.apache.org/jira/browse/LUCENE-5260 - *
  • * */ public class DocumentDictionary implements Dictionary { @@ -95,7 +89,7 @@ public class DocumentDictionary implements Dictionary { final class TermWeightPayloadIterator implements TermFreqPayloadIterator { private final int docCount; private final Set relevantFields; - private final boolean withPayload; + private final boolean hasPayloads; private final Bits liveDocs; private int currentDocId = -1; private long currentWeight; @@ -106,13 +100,13 @@ public class DocumentDictionary implements Dictionary { * index. setting withPayload to false, implies an iterator * over only term and weight. */ - public TermWeightPayloadIterator(boolean withPayload) throws IOException { + public TermWeightPayloadIterator(boolean hasPayloads) throws IOException { docCount = reader.maxDoc() - 1; - this.withPayload = withPayload; + this.hasPayloads = hasPayloads; currentPayload = null; liveDocs = MultiFields.getLiveDocs(reader); List relevantFieldList; - if(withPayload) { + if(hasPayloads) { relevantFieldList = Arrays.asList(field, weightField, payloadField); } else { relevantFieldList = Arrays.asList(field, weightField); @@ -135,7 +129,7 @@ public class DocumentDictionary implements Dictionary { StoredDocument doc = reader.document(currentDocId, relevantFields); - if (withPayload) { + if (hasPayloads) { StorableField payload = doc.getField(payloadField); if (payload == null) { throw new IllegalArgumentException(payloadField + " does not exist"); @@ -169,6 +163,11 @@ public class DocumentDictionary implements Dictionary { public BytesRef payload() { return currentPayload; } + + @Override + public boolean hasPayloads() { + return hasPayloads; + } } } diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java index fa242ef..b03033b 100644 --- lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java @@ -21,7 +21,7 @@ package org.apache.lucene.search.suggest; import java.io.*; import org.apache.lucene.search.spell.Dictionary; -import org.apache.lucene.search.spell.TermFreqIterator; +import org.apache.lucene.search.spell.TermFreqPayloadIterator; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; @@ -57,11 +57,11 @@ public class FileDictionary implements Dictionary { } @Override - public TermFreqIterator getWordsIterator() { + public TermFreqPayloadIterator getWordsIterator() { return new FileIterator(); } - final class FileIterator implements TermFreqIterator { + final class FileIterator implements TermFreqPayloadIterator { private long curFreq; private final BytesRef spare = new BytesRef(); @@ -98,5 +98,15 @@ public class FileDictionary implements Dictionary { return null; } } + + @Override + public BytesRef payload() { + return null; + } + + @Override + public boolean hasPayloads() { + return false; + } } } diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java index a1c64d3..edee62b 100644 --- lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java @@ -24,7 +24,7 @@ import java.util.Comparator; import java.util.List; import org.apache.lucene.search.spell.Dictionary; -import org.apache.lucene.search.spell.TermFreqIterator; +import org.apache.lucene.search.spell.TermFreqPayloadIterator; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefIterator; import org.apache.lucene.util.PriorityQueue; @@ -154,25 +154,25 @@ public abstract class Lookup { /** Build lookup from a dictionary. Some implementations may require sorted * or unsorted keys from the dictionary's iterator - use - * {@link SortedTermFreqIteratorWrapper} or - * {@link UnsortedTermFreqIteratorWrapper} in such case. + * {@link SortedTermFreqPayloadIteratorWrapper} or + * {@link UnsortedTermFreqPayloadIteratorWrapper} in such case. */ public void build(Dictionary dict) throws IOException { BytesRefIterator it = dict.getWordsIterator(); - TermFreqIterator tfit; - if (it instanceof TermFreqIterator) { - tfit = (TermFreqIterator)it; + TermFreqPayloadIterator tfit; + if (it instanceof TermFreqPayloadIterator) { + tfit = (TermFreqPayloadIterator)it; } else { - tfit = new TermFreqIterator.TermFreqIteratorWrapper(it); + tfit = new TermFreqPayloadIterator.TermFreqPayloadIteratorWrapper(it); } build(tfit); } /** - * Builds up a new internal {@link Lookup} representation based on the given {@link TermFreqIterator}. + * Builds up a new internal {@link Lookup} representation based on the given {@link TermFreqPayloadIterator}. * The implementation might re-sort the data internally. */ - public abstract void build(TermFreqIterator tfit) throws IOException; + public abstract void build(TermFreqPayloadIterator tfit) throws IOException; /** * Look up a key and return possible completion for this key. diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java deleted file mode 100644 index 53c4212..0000000 --- lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java +++ /dev/null @@ -1,185 +0,0 @@ -package org.apache.lucene.search.suggest; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.File; -import java.io.IOException; -import java.util.Comparator; - -import org.apache.lucene.search.spell.TermFreqIterator; -import org.apache.lucene.search.suggest.Sort.ByteSequencesReader; -import org.apache.lucene.search.suggest.Sort.ByteSequencesWriter; -import org.apache.lucene.store.ByteArrayDataInput; -import org.apache.lucene.store.ByteArrayDataOutput; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.IOUtils; - -/** - * This wrapper buffers incoming elements and makes sure they are sorted based on given comparator. - * @lucene.experimental - */ -public class SortedTermFreqIteratorWrapper implements TermFreqIterator { - - private final TermFreqIterator source; - private File tempInput; - private File tempSorted; - private final ByteSequencesReader reader; - private final Comparator comparator; - private boolean done = false; - - private long weight; - private final BytesRef scratch = new BytesRef(); - - /** - * Creates a new sorted wrapper, using {@link - * BytesRef#getUTF8SortedAsUnicodeComparator} for - * sorting. */ - public SortedTermFreqIteratorWrapper(TermFreqIterator source) throws IOException { - this(source, BytesRef.getUTF8SortedAsUnicodeComparator()); - } - - /** - * Creates a new sorted wrapper, sorting by BytesRef - * (ascending) then cost (ascending). - */ - public SortedTermFreqIteratorWrapper(TermFreqIterator source, Comparator comparator) throws IOException { - this.source = source; - this.comparator = comparator; - this.reader = sort(); - } - - @Override - public BytesRef next() throws IOException { - boolean success = false; - if (done) { - return null; - } - try { - ByteArrayDataInput input = new ByteArrayDataInput(); - if (reader.read(scratch)) { - weight = decode(scratch, input); - success = true; - return scratch; - } - close(); - success = done = true; - return null; - } finally { - if (!success) { - done = true; - close(); - } - } - } - - @Override - public long weight() { - return weight; - } - - /** Sortes by BytesRef (ascending) then cost (ascending). */ - private final Comparator tieBreakByCostComparator = new Comparator() { - - private final BytesRef leftScratch = new BytesRef(); - private final BytesRef rightScratch = new BytesRef(); - private final ByteArrayDataInput input = new ByteArrayDataInput(); - - @Override - public int compare(BytesRef left, BytesRef right) { - // Make shallow copy in case decode changes the BytesRef: - leftScratch.bytes = left.bytes; - leftScratch.offset = left.offset; - leftScratch.length = left.length; - rightScratch.bytes = right.bytes; - rightScratch.offset = right.offset; - rightScratch.length = right.length; - long leftCost = decode(leftScratch, input); - long rightCost = decode(rightScratch, input); - int cmp = comparator.compare(leftScratch, rightScratch); - if (cmp != 0) { - return cmp; - } - return Long.compare(leftCost, rightCost); - } - }; - - private Sort.ByteSequencesReader sort() throws IOException { - String prefix = getClass().getSimpleName(); - File directory = Sort.defaultTempDir(); - tempInput = File.createTempFile(prefix, ".input", directory); - tempSorted = File.createTempFile(prefix, ".sorted", directory); - - final Sort.ByteSequencesWriter writer = new Sort.ByteSequencesWriter(tempInput); - boolean success = false; - try { - BytesRef spare; - byte[] buffer = new byte[0]; - ByteArrayDataOutput output = new ByteArrayDataOutput(buffer); - - while ((spare = source.next()) != null) { - encode(writer, output, buffer, spare, source.weight()); - } - writer.close(); - new Sort(tieBreakByCostComparator).sort(tempInput, tempSorted); - ByteSequencesReader reader = new Sort.ByteSequencesReader(tempSorted); - success = true; - return reader; - - } finally { - if (success) { - IOUtils.close(writer); - } else { - try { - IOUtils.closeWhileHandlingException(writer); - } finally { - close(); - } - } - } - } - - private void close() throws IOException { - IOUtils.close(reader); - if (tempInput != null) { - tempInput.delete(); - } - if (tempSorted != null) { - tempSorted.delete(); - } - } - - /** encodes an entry (bytes+weight) to the provided writer */ - protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, long weight) throws IOException { - if (spare.length + 8 >= buffer.length) { - buffer = ArrayUtil.grow(buffer, spare.length + 8); - } - output.reset(buffer); - output.writeBytes(spare.bytes, spare.offset, spare.length); - output.writeLong(weight); - writer.write(buffer, 0, output.getPosition()); - } - - /** decodes the weight at the current position */ - protected long decode(BytesRef scratch, ByteArrayDataInput tmpInput) { - tmpInput.reset(scratch.bytes); - tmpInput.skipBytes(scratch.length - 8); // suggestion - scratch.length -= 8; // long - return tmpInput.readLong(); - } -} diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqPayloadIteratorWrapper.java lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqPayloadIteratorWrapper.java new file mode 100644 index 0000000..b8fa103 --- /dev/null +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqPayloadIteratorWrapper.java @@ -0,0 +1,227 @@ +package org.apache.lucene.search.suggest; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import java.io.IOException; +import java.util.Comparator; + +import org.apache.lucene.search.spell.TermFreqPayloadIterator; +import org.apache.lucene.search.suggest.Sort.ByteSequencesReader; +import org.apache.lucene.search.suggest.Sort.ByteSequencesWriter; +import org.apache.lucene.store.ByteArrayDataInput; +import org.apache.lucene.store.ByteArrayDataOutput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IOUtils; + +/** + * This wrapper buffers incoming elements and makes sure they are sorted based on given comparator. + * @lucene.experimental + */ +public class SortedTermFreqPayloadIteratorWrapper implements TermFreqPayloadIterator { + + private final TermFreqPayloadIterator source; + private File tempInput; + private File tempSorted; + private final ByteSequencesReader reader; + private final Comparator comparator; + private final boolean hasPayloads; + private boolean done = false; + + private long weight; + private final BytesRef scratch = new BytesRef(); + private BytesRef payload = new BytesRef(); + + /** + * Creates a new sorted wrapper, using {@link + * BytesRef#getUTF8SortedAsUnicodeComparator} for + * sorting. */ + public SortedTermFreqPayloadIteratorWrapper(TermFreqPayloadIterator source) throws IOException { + this(source, BytesRef.getUTF8SortedAsUnicodeComparator()); + } + + /** + * Creates a new sorted wrapper, sorting by BytesRef + * (ascending) then cost (ascending). + */ + public SortedTermFreqPayloadIteratorWrapper(TermFreqPayloadIterator source, Comparator comparator) throws IOException { + this.hasPayloads = source.hasPayloads(); + this.source = source; + this.comparator = comparator; + this.reader = sort(); + } + + @Override + public BytesRef next() throws IOException { + boolean success = false; + if (done) { + return null; + } + try { + ByteArrayDataInput input = new ByteArrayDataInput(); + if (reader.read(scratch)) { + weight = decode(scratch, input); + if (hasPayloads) { + payload = decodePayload(scratch, input); + } + success = true; + return scratch; + } + close(); + success = done = true; + return null; + } finally { + if (!success) { + done = true; + close(); + } + } + } + + @Override + public long weight() { + return weight; + } + + @Override + public BytesRef payload() { + if (hasPayloads) { + return payload; + } + return null; + } + + @Override + public boolean hasPayloads() { + return hasPayloads; + } + + /** Sortes by BytesRef (ascending) then cost (ascending). */ + private final Comparator tieBreakByCostComparator = new Comparator() { + + private final BytesRef leftScratch = new BytesRef(); + private final BytesRef rightScratch = new BytesRef(); + private final ByteArrayDataInput input = new ByteArrayDataInput(); + + @Override + public int compare(BytesRef left, BytesRef right) { + // Make shallow copy in case decode changes the BytesRef: + leftScratch.bytes = left.bytes; + leftScratch.offset = left.offset; + leftScratch.length = left.length; + rightScratch.bytes = right.bytes; + rightScratch.offset = right.offset; + rightScratch.length = right.length; + long leftCost = decode(leftScratch, input); + long rightCost = decode(rightScratch, input); + if (hasPayloads) { + decodePayload(leftScratch, input); + decodePayload(rightScratch, input); + } + int cmp = comparator.compare(leftScratch, rightScratch); + if (cmp != 0) { + return cmp; + } + return Long.compare(leftCost, rightCost); + } + }; + + private Sort.ByteSequencesReader sort() throws IOException { + String prefix = getClass().getSimpleName(); + File directory = Sort.defaultTempDir(); + tempInput = File.createTempFile(prefix, ".input", directory); + tempSorted = File.createTempFile(prefix, ".sorted", directory); + + final Sort.ByteSequencesWriter writer = new Sort.ByteSequencesWriter(tempInput); + boolean success = false; + try { + BytesRef spare; + byte[] buffer = new byte[0]; + ByteArrayDataOutput output = new ByteArrayDataOutput(buffer); + + while ((spare = source.next()) != null) { + encode(writer, output, buffer, spare, source.payload(), source.weight()); + } + writer.close(); + new Sort(tieBreakByCostComparator).sort(tempInput, tempSorted); + ByteSequencesReader reader = new Sort.ByteSequencesReader(tempSorted); + success = true; + return reader; + + } finally { + if (success) { + IOUtils.close(writer); + } else { + try { + IOUtils.closeWhileHandlingException(writer); + } finally { + close(); + } + } + } + } + + private void close() throws IOException { + IOUtils.close(reader); + if (tempInput != null) { + tempInput.delete(); + } + if (tempSorted != null) { + tempSorted.delete(); + } + } + + /** encodes an entry (bytes+(payload)+weight) to the provided writer */ + protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, BytesRef payload, long weight) throws IOException { + int requiredLength = spare.length + 8 + ((hasPayloads) ? 2 + payload.length : 0); + if (requiredLength >= buffer.length) { + buffer = ArrayUtil.grow(buffer, requiredLength); + } + output.reset(buffer); + output.writeBytes(spare.bytes, spare.offset, spare.length); + if (hasPayloads) { + output.writeBytes(payload.bytes, payload.offset, payload.length); + output.writeShort((short) payload.length); + } + output.writeLong(weight); + writer.write(buffer, 0, output.getPosition()); + } + + /** decodes the weight at the current position */ + protected long decode(BytesRef scratch, ByteArrayDataInput tmpInput) { + tmpInput.reset(scratch.bytes); + tmpInput.skipBytes(scratch.length - 8); // suggestion + scratch.length -= 8; // long + return tmpInput.readLong(); + } + + /** decodes the payload at the current position */ + protected BytesRef decodePayload(BytesRef scratch, ByteArrayDataInput tmpInput) { + tmpInput.reset(scratch.bytes); + tmpInput.skipBytes(scratch.length - 2); // skip to payload size + short payloadLength = tmpInput.readShort(); // read payload size + tmpInput.setPosition(scratch.length - 2 - payloadLength); // setPosition to start of payload + BytesRef payloadScratch = new BytesRef(payloadLength); + tmpInput.readBytes(payloadScratch.bytes, 0, payloadLength); // read payload + payloadScratch.length = payloadLength; + scratch.length -= 2; // payload length info (short) + scratch.length -= payloadLength; // payload + return payloadScratch; + } +} diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java deleted file mode 100644 index c242195..0000000 --- lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java +++ /dev/null @@ -1,67 +0,0 @@ -package org.apache.lucene.search.suggest; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.util.Random; - -import org.apache.lucene.search.spell.TermFreqIterator; -import org.apache.lucene.util.BytesRef; - -/** - * This wrapper buffers the incoming elements and makes sure they are in - * random order. - * @lucene.experimental - */ -public class UnsortedTermFreqIteratorWrapper extends BufferingTermFreqIteratorWrapper { - // TODO keep this for now - private final int[] ords; - private int currentOrd = -1; - private final BytesRef spare = new BytesRef(); - /** - * Creates a new iterator, wrapping the specified iterator and - * returning elements in a random order. - */ - public UnsortedTermFreqIteratorWrapper(TermFreqIterator source) throws IOException { - super(source); - ords = new int[entries.size()]; - Random random = new Random(); - for (int i = 0; i < ords.length; i++) { - ords[i] = i; - } - for (int i = 0; i < ords.length; i++) { - int randomPosition = random.nextInt(ords.length); - int temp = ords[i]; - ords[i] = ords[randomPosition]; - ords[randomPosition] = temp; - } - } - - @Override - public long weight() { - return freqs[currentOrd]; - } - - @Override - public BytesRef next() throws IOException { - if (++curPos < entries.size()) { - return entries.get(spare, (currentOrd = ords[curPos])); - } - return null; - } -} diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqPayloadIteratorWrapper.java lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqPayloadIteratorWrapper.java new file mode 100644 index 0000000..8aad73b --- /dev/null +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqPayloadIteratorWrapper.java @@ -0,0 +1,79 @@ +package org.apache.lucene.search.suggest; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Random; + +import org.apache.lucene.search.spell.TermFreqPayloadIterator; +import org.apache.lucene.util.BytesRef; + +/** + * This wrapper buffers the incoming elements and makes sure they are in + * random order. + * @lucene.experimental + */ +public class UnsortedTermFreqPayloadIteratorWrapper extends BufferingTermFreqPayloadIteratorWrapper { + // TODO keep this for now + private final int[] ords; + private int currentOrd = -1; + private final BytesRef spare = new BytesRef(); + private final BytesRef payloadSpare = new BytesRef(); + /** + * Creates a new iterator, wrapping the specified iterator and + * returning elements in a random order. + */ + public UnsortedTermFreqPayloadIteratorWrapper(TermFreqPayloadIterator source) throws IOException { + super(source); + ords = new int[entries.size()]; + Random random = new Random(); + for (int i = 0; i < ords.length; i++) { + ords[i] = i; + } + for (int i = 0; i < ords.length; i++) { + int randomPosition = random.nextInt(ords.length); + int temp = ords[i]; + ords[i] = ords[randomPosition]; + ords[randomPosition] = temp; + } + } + + @Override + public long weight() { + assert currentOrd == ords[curPos]; + return freqs[currentOrd]; + } + + @Override + public BytesRef next() throws IOException { + if (++curPos < entries.size()) { + currentOrd = ords[curPos]; + return entries.get(spare, currentOrd); + } + return null; + } + + @Override + public BytesRef payload() { + if (hasPayloads() && curPos < payloads.size()) { + assert currentOrd == ords[curPos]; + return payloads.get(payloadSpare, currentOrd); + } + return null; + } +} diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java index 632023d..27d73b8 100644 --- lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java @@ -65,7 +65,6 @@ import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; -import org.apache.lucene.search.spell.TermFreqIterator; import org.apache.lucene.search.spell.TermFreqPayloadIterator; import org.apache.lucene.search.suggest.Lookup.LookupResult; // javadocs import org.apache.lucene.search.suggest.Lookup; @@ -176,19 +175,14 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { } @Override - public void build(TermFreqIterator iter) throws IOException { + public void build(TermFreqPayloadIterator iter) throws IOException { if (searcher != null) { searcher.getIndexReader().close(); searcher = null; } - TermFreqPayloadIterator payloads; - if (iter instanceof TermFreqPayloadIterator) { - payloads = (TermFreqPayloadIterator) iter; - } else { - payloads = null; - } + Directory dirTmp = getDirectory(new File(indexPath.toString() + ".tmp")); IndexWriter w = null; @@ -236,7 +230,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { doc.add(weightField); Field payloadField; - if (payloads != null) { + if (iter.hasPayloads()) { payloadField = new BinaryDocValuesField("payloads", new BytesRef()); doc.add(payloadField); } else { @@ -250,8 +244,8 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { textGramField.setStringValue(textString); textDVField.setBytesValue(text); weightField.setLongValue(iter.weight()); - if (payloads != null) { - payloadField.setBytesValue(payloads.payload()); + if (iter.hasPayloads()) { + payloadField.setBytesValue(iter.payload()); } w.addDocument(doc); } diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java index 77f0f1c..0b6ff71 100644 --- lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java @@ -31,7 +31,6 @@ import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStreamToAutomaton; -import org.apache.lucene.search.spell.TermFreqIterator; import org.apache.lucene.search.spell.TermFreqPayloadIterator; import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.Sort; @@ -381,19 +380,13 @@ public class AnalyzingSuggester extends Lookup { } @Override - public void build(TermFreqIterator iterator) throws IOException { + public void build(TermFreqPayloadIterator iterator) throws IOException { String prefix = getClass().getSimpleName(); File directory = Sort.defaultTempDir(); File tempInput = File.createTempFile(prefix, ".input", directory); File tempSorted = File.createTempFile(prefix, ".sorted", directory); - TermFreqPayloadIterator payloads; - if (iterator instanceof TermFreqPayloadIterator) { - payloads = (TermFreqPayloadIterator) iterator; - } else { - payloads = null; - } - hasPayloads = payloads != null; + hasPayloads = iterator.hasPayloads(); Sort.ByteSequencesWriter writer = new Sort.ByteSequencesWriter(tempInput); Sort.ByteSequencesReader reader = null; @@ -432,7 +425,7 @@ public class AnalyzingSuggester extends Lookup { if (surfaceForm.length > (Short.MAX_VALUE-2)) { throw new IllegalArgumentException("cannot handle surface form > " + (Short.MAX_VALUE-2) + " in length (got " + surfaceForm.length + ")"); } - payload = payloads.payload(); + payload = iterator.payload(); // payload + surfaceLength (short) requiredLength += payload.length + 2; } else { @@ -470,7 +463,7 @@ public class AnalyzingSuggester extends Lookup { writer.close(); // Sort all input/output pairs (required by FST.Builder): - new Sort(new AnalyzingComparator(payloads != null)).sort(tempInput, tempSorted); + new Sort(new AnalyzingComparator(hasPayloads)).sort(tempInput, tempSorted); // Free disk space: tempInput.delete(); diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java index d2f652d..cee929b 100644 --- lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java @@ -54,7 +54,6 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.spell.TermFreqIterator; import org.apache.lucene.search.spell.TermFreqPayloadIterator; import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.Sort; @@ -274,15 +273,15 @@ public class FreeTextSuggester extends Lookup { } @Override - public void build(TermFreqIterator iterator) throws IOException { + public void build(TermFreqPayloadIterator iterator) throws IOException { build(iterator, IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB); } /** Build the suggest index, using up to the specified * amount of temporary RAM while building. Note that * the weights for the suggestions are ignored. */ - public void build(TermFreqIterator iterator, double ramBufferSizeMB) throws IOException { - if (iterator instanceof TermFreqPayloadIterator) { + public void build(TermFreqPayloadIterator iterator, double ramBufferSizeMB) throws IOException { + if (iterator.hasPayloads()) { throw new IllegalArgumentException("payloads are not supported"); } diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java index 2f4fe05..2bc0aec 100644 --- lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java @@ -24,7 +24,6 @@ import java.io.OutputStream; import java.util.ArrayList; import java.util.List; -import org.apache.lucene.search.spell.TermFreqIterator; import org.apache.lucene.search.spell.TermFreqPayloadIterator; import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.Sort.SortInfo; @@ -43,7 +42,7 @@ import org.apache.lucene.util.fst.NoOutputs; * An adapter from {@link Lookup} API to {@link FSTCompletion}. * *

    This adapter differs from {@link FSTCompletion} in that it attempts - * to discretize any "weights" as passed from in {@link TermFreqIterator#weight()} + * to discretize any "weights" as passed from in {@link TermFreqPayloadIterator#weight()} * to match the number of buckets. For the rationale for bucketing, see * {@link FSTCompletion}. * @@ -96,7 +95,7 @@ public class FSTCompletionLookup extends Lookup { /** * This constructor prepares for creating a suggested FST using the - * {@link #build(TermFreqIterator)} method. The number of weight + * {@link #build(TermFreqPayloadIterator)} method. The number of weight * discretization buckets is set to {@link FSTCompletion#DEFAULT_BUCKETS} and * exact matches are promoted to the top of the suggestions list. */ @@ -106,7 +105,7 @@ public class FSTCompletionLookup extends Lookup { /** * This constructor prepares for creating a suggested FST using the - * {@link #build(TermFreqIterator)} method. + * {@link #build(TermFreqPayloadIterator)} method. * * @param buckets * The number of weight discretization buckets (see @@ -141,8 +140,8 @@ public class FSTCompletionLookup extends Lookup { } @Override - public void build(TermFreqIterator tfit) throws IOException { - if (tfit instanceof TermFreqPayloadIterator) { + public void build(TermFreqPayloadIterator tfit) throws IOException { + if (tfit.hasPayloads()) { throw new IllegalArgumentException("this suggester doesn't support payloads"); } File tempInput = File.createTempFile( diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java index f634bee..982cab5 100644 --- lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java @@ -25,11 +25,10 @@ import java.util.Collections; import java.util.Comparator; import java.util.List; -import org.apache.lucene.search.spell.TermFreqIterator; import org.apache.lucene.search.spell.TermFreqPayloadIterator; import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.Sort.ByteSequencesWriter; -import org.apache.lucene.search.suggest.SortedTermFreqIteratorWrapper; +import org.apache.lucene.search.suggest.SortedTermFreqPayloadIteratorWrapper; import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.ByteArrayDataOutput; import org.apache.lucene.store.InputStreamDataInput; @@ -93,12 +92,12 @@ public class WFSTCompletionLookup extends Lookup { } @Override - public void build(TermFreqIterator iterator) throws IOException { - if (iterator instanceof TermFreqPayloadIterator) { + public void build(TermFreqPayloadIterator iterator) throws IOException { + if (iterator.hasPayloads()) { throw new IllegalArgumentException("this suggester doesn't support payloads"); } BytesRef scratch = new BytesRef(); - TermFreqIterator iter = new WFSTTermFreqIteratorWrapper(iterator); + TermFreqPayloadIterator iter = new WFSTTermFreqIteratorWrapper(iterator); IntsRef scratchInts = new IntsRef(); BytesRef previous = null; PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); @@ -255,14 +254,15 @@ public class WFSTCompletionLookup extends Lookup { return Integer.MAX_VALUE - (int)value; } - private final class WFSTTermFreqIteratorWrapper extends SortedTermFreqIteratorWrapper { + private final class WFSTTermFreqIteratorWrapper extends SortedTermFreqPayloadIteratorWrapper { - WFSTTermFreqIteratorWrapper(TermFreqIterator source) throws IOException { + WFSTTermFreqIteratorWrapper(TermFreqPayloadIterator source) throws IOException { super(source); + assert source.hasPayloads() == false; } @Override - protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, long weight) throws IOException { + protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, BytesRef payload, long weight) throws IOException { if (spare.length + 4 >= buffer.length) { buffer = ArrayUtil.grow(buffer, spare.length + 4); } diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java index 558e115..83ac512 100644 --- lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java @@ -25,7 +25,6 @@ import java.io.OutputStream; import java.util.ArrayList; import java.util.List; -import org.apache.lucene.search.spell.TermFreqIterator; import org.apache.lucene.search.spell.TermFreqPayloadIterator; import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.jaspell.JaspellTernarySearchTrie.TSTNode; @@ -47,13 +46,13 @@ public class JaspellLookup extends Lookup { /** * Creates a new empty trie - * @see #build(TermFreqIterator) + * @see #build(TermFreqPayloadIterator) * */ public JaspellLookup() {} @Override - public void build(TermFreqIterator tfit) throws IOException { - if (tfit instanceof TermFreqPayloadIterator) { + public void build(TermFreqPayloadIterator tfit) throws IOException { + if (tfit.hasPayloads()) { throw new IllegalArgumentException("this suggester doesn't support payloads"); } trie = new JaspellTernarySearchTrie(); diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java index 852ebb5..6eb173c 100644 --- lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java @@ -25,10 +25,9 @@ import java.io.OutputStream; import java.util.ArrayList; import java.util.List; -import org.apache.lucene.search.spell.TermFreqIterator; import org.apache.lucene.search.spell.TermFreqPayloadIterator; import org.apache.lucene.search.suggest.Lookup; -import org.apache.lucene.search.suggest.SortedTermFreqIteratorWrapper; +import org.apache.lucene.search.suggest.SortedTermFreqPayloadIteratorWrapper; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.IOUtils; @@ -46,19 +45,19 @@ public class TSTLookup extends Lookup { /** * Creates a new TSTLookup with an empty Ternary Search Tree. - * @see #build(TermFreqIterator) + * @see #build(TermFreqPayloadIterator) */ public TSTLookup() {} @Override - public void build(TermFreqIterator tfit) throws IOException { - if (tfit instanceof TermFreqPayloadIterator) { + public void build(TermFreqPayloadIterator tfit) throws IOException { + if (tfit.hasPayloads()) { throw new IllegalArgumentException("this suggester doesn't support payloads"); } root = new TernaryTreeNode(); // make sure it's sorted and the comparator uses UTF16 sort order - tfit = new SortedTermFreqIteratorWrapper(tfit, BytesRef.getUTF8SortedAsUTF16Comparator()); + tfit = new SortedTermFreqPayloadIteratorWrapper(tfit, BytesRef.getUTF8SortedAsUTF16Comparator()); ArrayList tokens = new ArrayList(); ArrayList vals = new ArrayList(); diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java index f71318c..f57d5d3 100644 --- lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java @@ -72,12 +72,12 @@ public class LookupBenchmarkTest extends LuceneTestCase { /** * Input term/weight pairs. */ - private static TermFreq [] dictionaryInput; + private static TermFreqPayload [] dictionaryInput; /** * Benchmark term/weight pairs (randomized order). */ - private static List benchmarkInput; + private static List benchmarkInput; /** * Loads terms and frequencies from Wikipedia (cached). @@ -85,9 +85,9 @@ public class LookupBenchmarkTest extends LuceneTestCase { @BeforeClass public static void setup() throws Exception { assert false : "disable assertions before running benchmarks!"; - List input = readTop50KWiki(); + List input = readTop50KWiki(); Collections.shuffle(input, random); - LookupBenchmarkTest.dictionaryInput = input.toArray(new TermFreq [input.size()]); + LookupBenchmarkTest.dictionaryInput = input.toArray(new TermFreqPayload [input.size()]); Collections.shuffle(input, random); LookupBenchmarkTest.benchmarkInput = input; } @@ -97,8 +97,8 @@ public class LookupBenchmarkTest extends LuceneTestCase { /** * Collect the multilingual input for benchmarks/ tests. */ - public static List readTop50KWiki() throws Exception { - List input = new ArrayList(); + public static List readTop50KWiki() throws Exception { + List input = new ArrayList(); URL resource = LookupBenchmarkTest.class.getResource("Top50KWiki.utf8"); assert resource != null : "Resource missing: Top50KWiki.utf8"; @@ -109,7 +109,7 @@ public class LookupBenchmarkTest extends LuceneTestCase { assertTrue("No | separator?: " + line, tab >= 0); int weight = Integer.parseInt(line.substring(tab + 1)); String key = line.substring(0, tab); - input.add(new TermFreq(key, weight)); + input.add(new TermFreqPayload(key, weight)); } br.close(); return input; @@ -163,7 +163,7 @@ public class LookupBenchmarkTest extends LuceneTestCase { /** * Create {@link Lookup} instance and populate it. */ - private Lookup buildLookup(Class cls, TermFreq[] input) throws Exception { + private Lookup buildLookup(Class cls, TermFreqPayload[] input) throws Exception { Lookup lookup = null; try { lookup = cls.newInstance(); @@ -176,7 +176,7 @@ public class LookupBenchmarkTest extends LuceneTestCase { lookup = ctor.newInstance(a); } } - lookup.build(new TermFreqArrayIterator(input)); + lookup.build(new TermFreqPayloadArrayIterator(input)); return lookup; } @@ -220,7 +220,7 @@ public class LookupBenchmarkTest extends LuceneTestCase { final Lookup lookup = buildLookup(cls, dictionaryInput); final List input = new ArrayList(benchmarkInput.size()); - for (TermFreq tf : benchmarkInput) { + for (TermFreqPayload tf : benchmarkInput) { String s = tf.term.utf8ToString(); String sub = s.substring(0, Math.min(s.length(), minPrefixLen + random.nextInt(maxPrefixLen - minPrefixLen + 1))); diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java lucene/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java index ef948a8..2439857 100644 --- lucene/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java @@ -65,10 +65,10 @@ public class PersistenceTest extends LuceneTestCase { // Add all input keys. Lookup lookup = lookupClass.newInstance(); - TermFreq[] keys = new TermFreq[this.keys.length]; + TermFreqPayload[] keys = new TermFreqPayload[this.keys.length]; for (int i = 0; i < keys.length; i++) - keys[i] = new TermFreq(this.keys[i], i); - lookup.build(new TermFreqArrayIterator(keys)); + keys[i] = new TermFreqPayload(this.keys[i], i); + lookup.build(new TermFreqPayloadArrayIterator(keys)); // Store the suggester. File storeDir = TEMP_DIR; @@ -81,7 +81,7 @@ public class PersistenceTest extends LuceneTestCase { // Assert validity. Random random = random(); long previous = Long.MIN_VALUE; - for (TermFreq k : keys) { + for (TermFreqPayload k : keys) { List list = lookup.lookup(_TestUtil.bytesToCharSequence(k.term, random), false, 1); assertEquals(1, list.size()); LookupResult lookupResult = list.get(0); diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreq.java lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreq.java deleted file mode 100644 index 2b02ac1..0000000 --- lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreq.java +++ /dev/null @@ -1,34 +0,0 @@ -package org.apache.lucene.search.suggest; - -import org.apache.lucene.util.BytesRef; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -public final class TermFreq { - public final BytesRef term; - public final long v; - - public TermFreq(String term, long v) { - this(new BytesRef(term), v); - } - - public TermFreq(BytesRef term, long v) { - this.term = term; - this.v = v; - } -} \ No newline at end of file diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqArrayIterator.java lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqArrayIterator.java deleted file mode 100644 index d77fa5c..0000000 --- lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqArrayIterator.java +++ /dev/null @@ -1,60 +0,0 @@ -package org.apache.lucene.search.suggest; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.util.Arrays; -import java.util.Iterator; - -import org.apache.lucene.search.spell.TermFreqIterator; -import org.apache.lucene.util.BytesRef; - -/** - * A {@link TermFreqIterator} over a sequence of {@link TermFreq}s. - */ -public final class TermFreqArrayIterator implements TermFreqIterator { - private final Iterator i; - private TermFreq current; - private final BytesRef spare = new BytesRef(); - - public TermFreqArrayIterator(Iterator i) { - this.i = i; - } - - public TermFreqArrayIterator(TermFreq [] i) { - this(Arrays.asList(i)); - } - - public TermFreqArrayIterator(Iterable i) { - this(i.iterator()); - } - - @Override - public long weight() { - return current.v; - } - - @Override - public BytesRef next() { - if (i.hasNext()) { - current = i.next(); - spare.copyBytes(current.term); - return spare; - } - return null; - } -} \ No newline at end of file diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayload.java lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayload.java index 7640281..5463a13 100644 --- lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayload.java +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayload.java @@ -23,14 +23,32 @@ public final class TermFreqPayload { public final BytesRef term; public final long v; public final BytesRef payload; + public final boolean hasPayloads; + public TermFreqPayload(BytesRef term, long v, BytesRef payload) { + this(term, v, payload, true); + } + public TermFreqPayload(String term, long v, BytesRef payload) { - this(new BytesRef(term), v, payload); + this(new BytesRef(term), v, payload, true); } - public TermFreqPayload(BytesRef term, long v, BytesRef payload) { + public TermFreqPayload(BytesRef term, long v) { + this(term, v, null, false); + } + + public TermFreqPayload(String term, long v) { + this(new BytesRef(term), v, null, false); + } + + public TermFreqPayload(BytesRef term, long v, BytesRef payload, boolean hasPayloads) { this.term = term; this.v = v; this.payload = payload; + this.hasPayloads = hasPayloads; + } + + public boolean hasPayloads() { + return hasPayloads; } } \ No newline at end of file diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayloadArrayIterator.java lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayloadArrayIterator.java index 5bfb073..6583f73 100644 --- lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayloadArrayIterator.java +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayloadArrayIterator.java @@ -20,26 +20,33 @@ package org.apache.lucene.search.suggest; import java.util.Arrays; import java.util.Iterator; -import org.apache.lucene.search.spell.TermFreqIterator; import org.apache.lucene.search.spell.TermFreqPayloadIterator; import org.apache.lucene.util.BytesRef; /** - * A {@link TermFreqIterator} over a sequence of {@link TermFreq}s. + * A {@link TermFreqPayloadIterator} over a sequence of {@link TermFreqPayload}s. */ public final class TermFreqPayloadArrayIterator implements TermFreqPayloadIterator { private final Iterator i; + private final boolean hasPayloads; + private boolean first; private TermFreqPayload current; private final BytesRef spare = new BytesRef(); public TermFreqPayloadArrayIterator(Iterator i) { this.i = i; + if (i.hasNext()) { + current = i.next(); + first = true; + this.hasPayloads = current.hasPayloads; + } else { + this.hasPayloads = false; + } } public TermFreqPayloadArrayIterator(TermFreqPayload[] i) { this(Arrays.asList(i)); } - public TermFreqPayloadArrayIterator(Iterable i) { this(i.iterator()); } @@ -51,8 +58,12 @@ public final class TermFreqPayloadArrayIterator implements TermFreqPayloadIterat @Override public BytesRef next() { - if (i.hasNext()) { - current = i.next(); + if (i.hasNext() || (first && current!=null)) { + if (first) { + first = false; + } else { + current = i.next(); + } spare.copyBytes(current.term); return spare; } @@ -63,4 +74,9 @@ public final class TermFreqPayloadArrayIterator implements TermFreqPayloadIterat public BytesRef payload() { return current.payload; } + + @Override + public boolean hasPayloads() { + return hasPayloads; + } } \ No newline at end of file diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqIterator.java lucene/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqIterator.java deleted file mode 100644 index 3209b1a..0000000 --- lucene/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqIterator.java +++ /dev/null @@ -1,92 +0,0 @@ -package org.apache.lucene.search.suggest; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to You under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ - -import java.util.Comparator; -import java.util.Iterator; -import java.util.Map; -import java.util.Random; -import java.util.TreeMap; - -import org.apache.lucene.search.spell.TermFreqIterator; -import org.apache.lucene.store.ByteArrayDataOutput; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.BytesRefHash; -import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util._TestUtil; - -public class TestTermFreqIterator extends LuceneTestCase { - public void testEmpty() throws Exception { - TermFreqArrayIterator iterator = new TermFreqArrayIterator(new TermFreq[0]); - TermFreqIterator wrapper = new SortedTermFreqIteratorWrapper(iterator, BytesRef.getUTF8SortedAsUnicodeComparator()); - assertNull(wrapper.next()); - wrapper = new UnsortedTermFreqIteratorWrapper(iterator); - assertNull(wrapper.next()); - } - - public void testTerms() throws Exception { - Random random = random(); - int num = atLeast(10000); - - Comparator comparator = random.nextBoolean() ? BytesRef.getUTF8SortedAsUnicodeComparator() : BytesRef.getUTF8SortedAsUTF16Comparator(); - TreeMap sorted = new TreeMap(comparator); - TermFreq[] unsorted = new TermFreq[num]; - - for (int i = 0; i < num; i++) { - BytesRef key; - do { - key = new BytesRef(_TestUtil.randomUnicodeString(random)); - } while (sorted.containsKey(key)); - long value = random.nextLong(); - sorted.put(key, value); - unsorted[i] = new TermFreq(key, value); - } - - // test the sorted iterator wrapper - TermFreqIterator wrapper = new SortedTermFreqIteratorWrapper(new TermFreqArrayIterator(unsorted), comparator); - Iterator> expected = sorted.entrySet().iterator(); - while (expected.hasNext()) { - Map.Entry entry = expected.next(); - - assertEquals(entry.getKey(), wrapper.next()); - assertEquals(entry.getValue().longValue(), wrapper.weight()); - } - assertNull(wrapper.next()); - - // test the unsorted iterator wrapper - wrapper = new UnsortedTermFreqIteratorWrapper(new TermFreqArrayIterator(unsorted)); - TreeMap actual = new TreeMap(); - BytesRef key; - while ((key = wrapper.next()) != null) { - long value = wrapper.weight(); - actual.put(BytesRef.deepCopyOf(key), value); - } - assertEquals(sorted, actual); - } - - public static long asLong(BytesRef b) { - return (((long) asIntInternal(b, b.offset) << 32) | asIntInternal(b, - b.offset + 4) & 0xFFFFFFFFL); - } - - private static int asIntInternal(BytesRef b, int pos) { - return ((b.bytes[pos++] & 0xFF) << 24) | ((b.bytes[pos++] & 0xFF) << 16) - | ((b.bytes[pos++] & 0xFF) << 8) | (b.bytes[pos] & 0xFF); - } -} diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqPayloadIterator.java lucene/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqPayloadIterator.java new file mode 100644 index 0000000..e7d8257 --- /dev/null +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqPayloadIterator.java @@ -0,0 +1,124 @@ +package org.apache.lucene.search.suggest; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +import java.util.AbstractMap.SimpleEntry; +import java.util.Comparator; +import java.util.Iterator; +import java.util.Map; +import java.util.Random; +import java.util.TreeMap; + +import org.apache.lucene.search.spell.TermFreqPayloadIterator; +import org.apache.lucene.store.ByteArrayDataOutput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefHash; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; + +public class TestTermFreqPayloadIterator extends LuceneTestCase { + + public void testEmpty() throws Exception { + TermFreqPayloadArrayIterator iterator = new TermFreqPayloadArrayIterator(new TermFreqPayload[0]); + TermFreqPayloadIterator wrapper = new SortedTermFreqPayloadIteratorWrapper(iterator, BytesRef.getUTF8SortedAsUnicodeComparator()); + assertNull(wrapper.next()); + wrapper = new UnsortedTermFreqPayloadIteratorWrapper(iterator); + assertNull(wrapper.next()); + } + + public void testTerms() throws Exception { + Random random = random(); + int num = atLeast(10000); + + Comparator comparator = random.nextBoolean() ? BytesRef.getUTF8SortedAsUnicodeComparator() : BytesRef.getUTF8SortedAsUTF16Comparator(); + TreeMap> sorted = new TreeMap<>(comparator); + TreeMap sortedWithoutPayload = new TreeMap<>(comparator); + TermFreqPayload[] unsorted = new TermFreqPayload[num]; + TermFreqPayload[] unsortedWithoutPayload = new TermFreqPayload[num]; + + for (int i = 0; i < num; i++) { + BytesRef key; + BytesRef payload; + do { + key = new BytesRef(_TestUtil.randomUnicodeString(random)); + payload = new BytesRef(_TestUtil.randomUnicodeString(random)); + } while (sorted.containsKey(key)); + long value = random.nextLong(); + sortedWithoutPayload.put(key, value); + sorted.put(key, new SimpleEntry<>(value, payload)); + unsorted[i] = new TermFreqPayload(key, value, payload); + unsortedWithoutPayload[i] = new TermFreqPayload(key, value); + } + + // test the sorted iterator wrapper with payloads + TermFreqPayloadIterator wrapper = new SortedTermFreqPayloadIteratorWrapper(new TermFreqPayloadArrayIterator(unsorted), comparator); + Iterator>> expected = sorted.entrySet().iterator(); + while (expected.hasNext()) { + Map.Entry> entry = expected.next(); + + assertEquals(entry.getKey(), wrapper.next()); + assertEquals(entry.getValue().getKey().longValue(), wrapper.weight()); + assertEquals(entry.getValue().getValue(), wrapper.payload()); + } + assertNull(wrapper.next()); + + // test the unsorted iterator wrapper with payloads + wrapper = new UnsortedTermFreqPayloadIteratorWrapper(new TermFreqPayloadArrayIterator(unsorted)); + TreeMap> actual = new TreeMap<>(); + BytesRef key; + while ((key = wrapper.next()) != null) { + long value = wrapper.weight(); + BytesRef payload = wrapper.payload(); + actual.put(BytesRef.deepCopyOf(key), new SimpleEntry<>(value, BytesRef.deepCopyOf(payload))); + } + assertEquals(sorted, actual); + + // test the sorted iterator wrapper without payloads + TermFreqPayloadIterator wrapperWithoutPayload = new SortedTermFreqPayloadIteratorWrapper(new TermFreqPayloadArrayIterator(unsortedWithoutPayload), comparator); + Iterator> expectedWithoutPayload = sortedWithoutPayload.entrySet().iterator(); + while (expectedWithoutPayload.hasNext()) { + Map.Entry entry = expectedWithoutPayload.next(); + + assertEquals(entry.getKey(), wrapperWithoutPayload.next()); + assertEquals(entry.getValue().longValue(), wrapperWithoutPayload.weight()); + assertNull(wrapperWithoutPayload.payload()); + } + assertNull(wrapperWithoutPayload.next()); + + // test the unsorted iterator wrapper without payloads + wrapperWithoutPayload = new UnsortedTermFreqPayloadIteratorWrapper(new TermFreqPayloadArrayIterator(unsortedWithoutPayload)); + TreeMap actualWithoutPayload = new TreeMap<>(); + while ((key = wrapperWithoutPayload.next()) != null) { + long value = wrapperWithoutPayload.weight(); + assertNull(wrapperWithoutPayload.payload()); + actualWithoutPayload.put(BytesRef.deepCopyOf(key), value); + } + assertEquals(sortedWithoutPayload, actualWithoutPayload); + } + + public static long asLong(BytesRef b) { + return (((long) asIntInternal(b, b.offset) << 32) | asIntInternal(b, + b.offset + 4) & 0xFFFFFFFFL); + } + + private static int asIntInternal(BytesRef b, int pos) { + return ((b.bytes[pos++] & 0xFF) << 24) | ((b.bytes[pos++] & 0xFF) << 16) + | ((b.bytes[pos++] & 0xFF) << 8) | (b.bytes[pos] & 0xFF); + } +} diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java index 995f60d..f367f36 100644 --- lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java @@ -52,8 +52,6 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.search.suggest.Lookup.LookupResult; -import org.apache.lucene.search.suggest.TermFreq; -import org.apache.lucene.search.suggest.TermFreqArrayIterator; import org.apache.lucene.search.suggest.TermFreqPayload; import org.apache.lucene.search.suggest.TermFreqPayloadArrayIterator; import org.apache.lucene.util.BytesRef; @@ -65,18 +63,18 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { /** this is basically the WFST test ported to KeywordAnalyzer. so it acts the same */ public void testKeyword() throws Exception { - Iterable keys = shuffle( - new TermFreq("foo", 50), - new TermFreq("bar", 10), - new TermFreq("barbar", 10), - new TermFreq("barbar", 12), - new TermFreq("barbara", 6), - new TermFreq("bar", 5), - new TermFreq("barbara", 1) + Iterable keys = shuffle( + new TermFreqPayload("foo", 50), + new TermFreqPayload("bar", 10), + new TermFreqPayload("barbar", 10), + new TermFreqPayload("barbar", 12), + new TermFreqPayload("barbara", 6), + new TermFreqPayload("bar", 5), + new TermFreqPayload("barbara", 1) ); AnalyzingSuggester suggester = new AnalyzingSuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)); - suggester.build(new TermFreqArrayIterator(keys)); + suggester.build(new TermFreqPayloadArrayIterator(keys)); // top N of 2, but only foo is available List results = suggester.lookup(_TestUtil.stringToCharSequence("f", random()), false, 2); @@ -165,14 +163,14 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { public void testRandomRealisticKeys() throws IOException { LineFileDocs lineFile = new LineFileDocs(random()); Map mapping = new HashMap<>(); - List keys = new ArrayList<>(); + List keys = new ArrayList<>(); int howMany = atLeast(100); // this might bring up duplicates for (int i = 0; i < howMany; i++) { Document nextDoc = lineFile.nextDoc(); String title = nextDoc.getField("title").stringValue(); int randomWeight = random().nextInt(100); - keys.add(new TermFreq(title, randomWeight)); + keys.add(new TermFreqPayload(title, randomWeight)); if (!mapping.containsKey(title) || mapping.get(title) < randomWeight) { mapping.put(title, Long.valueOf(randomWeight)); } @@ -183,15 +181,15 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { boolean doPayloads = random().nextBoolean(); if (doPayloads) { List keysAndPayloads = new ArrayList<>(); - for (TermFreq termFreq : keys) { + for (TermFreqPayload termFreq : keys) { keysAndPayloads.add(new TermFreqPayload(termFreq.term, termFreq.v, new BytesRef(Long.toString(termFreq.v)))); } analyzingSuggester.build(new TermFreqPayloadArrayIterator(keysAndPayloads)); } else { - analyzingSuggester.build(new TermFreqArrayIterator(keys)); + analyzingSuggester.build(new TermFreqPayloadArrayIterator(keys)); } - for (TermFreq termFreq : keys) { + for (TermFreqPayload termFreq : keys) { List lookup = analyzingSuggester.lookup(termFreq.term.utf8ToString(), false, keys.size()); for (LookupResult lookupResult : lookup) { assertEquals(mapping.get(lookupResult.key), Long.valueOf(lookupResult.value)); @@ -211,14 +209,14 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { * basic "standardanalyzer" test with stopword removal */ public void testStandard() throws Exception { - TermFreq keys[] = new TermFreq[] { - new TermFreq("the ghost of christmas past", 50), + TermFreqPayload keys[] = new TermFreqPayload[] { + new TermFreqPayload("the ghost of christmas past", 50), }; Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET); AnalyzingSuggester suggester = new AnalyzingSuggester(standard); suggester.setPreservePositionIncrements(false); - suggester.build(new TermFreqArrayIterator(keys)); + suggester.build(new TermFreqPayloadArrayIterator(keys)); List results = suggester.lookup(_TestUtil.stringToCharSequence("the ghost of chris", random()), false, 1); assertEquals(1, results.size()); @@ -241,23 +239,23 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { public void testEmpty() throws Exception { Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET); AnalyzingSuggester suggester = new AnalyzingSuggester(standard); - suggester.build(new TermFreqArrayIterator(new TermFreq[0])); + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[0])); List result = suggester.lookup("a", false, 20); assertTrue(result.isEmpty()); } public void testNoSeps() throws Exception { - TermFreq[] keys = new TermFreq[] { - new TermFreq("ab cd", 0), - new TermFreq("abcd", 1), + TermFreqPayload[] keys = new TermFreqPayload[] { + new TermFreqPayload("ab cd", 0), + new TermFreqPayload("abcd", 1), }; int options = 0; Analyzer a = new MockAnalyzer(random()); AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, options, 256, -1); - suggester.build(new TermFreqArrayIterator(keys)); + suggester.build(new TermFreqPayloadArrayIterator(keys)); // TODO: would be nice if "ab " would allow the test to // pass, and more generally if the analyzer can know // that the user's current query has ended at a word, @@ -318,13 +316,13 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { } }; - TermFreq keys[] = new TermFreq[] { - new TermFreq("wifi network is slow", 50), - new TermFreq("wi fi network is fast", 10), + TermFreqPayload keys[] = new TermFreqPayload[] { + new TermFreqPayload("wifi network is slow", 50), + new TermFreqPayload("wi fi network is fast", 10), }; //AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer, AnalyzingSuggester.EXACT_FIRST, 256, -1); AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer); - suggester.build(new TermFreqArrayIterator(keys)); + suggester.build(new TermFreqPayloadArrayIterator(keys)); List results = suggester.lookup("wifi network", false, 10); if (VERBOSE) { System.out.println("Results: " + results); @@ -384,12 +382,12 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { } }; - TermFreq keys[] = new TermFreq[] { - new TermFreq("ab xc", 50), - new TermFreq("ba xd", 50), + TermFreqPayload keys[] = new TermFreqPayload[] { + new TermFreqPayload("ab xc", 50), + new TermFreqPayload("ba xd", 50), }; AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer); - suggester.build(new TermFreqArrayIterator(keys)); + suggester.build(new TermFreqPayloadArrayIterator(keys)); List results = suggester.lookup("ab x", false, 1); assertTrue(results.size() == 1); } @@ -462,11 +460,11 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { Analyzer a = getUnusualAnalyzer(); AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1); - suggester.build(new TermFreqArrayIterator(new TermFreq[] { - new TermFreq("x y", 1), - new TermFreq("x y z", 3), - new TermFreq("x", 2), - new TermFreq("z z z", 20), + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { + new TermFreqPayload("x y", 1), + new TermFreqPayload("x y z", 3), + new TermFreqPayload("x", 2), + new TermFreqPayload("z z z", 20), })); //System.out.println("ALL: " + suggester.lookup("x y", false, 6)); @@ -502,11 +500,11 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { Analyzer a = getUnusualAnalyzer(); AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1); - suggester.build(new TermFreqArrayIterator(new TermFreq[] { - new TermFreq("x y", 1), - new TermFreq("x y z", 3), - new TermFreq("x", 2), - new TermFreq("z z z", 20), + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { + new TermFreqPayload("x y", 1), + new TermFreqPayload("x y z", 3), + new TermFreqPayload("x", 2), + new TermFreqPayload("z z z", 20), })); for(int topN=1;topN<6;topN++) { @@ -657,12 +655,12 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { boolean doPayloads = random().nextBoolean(); - TermFreq[] keys = null; + TermFreqPayload[] keys = null; TermFreqPayload[] payloadKeys = null; if (doPayloads) { payloadKeys = new TermFreqPayload[numQueries]; } else { - keys = new TermFreq[numQueries]; + keys = new TermFreqPayload[numQueries]; } boolean preserveSep = random().nextBoolean(); @@ -735,7 +733,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { payload = new BytesRef(bytes); payloadKeys[i] = new TermFreqPayload(key, weight, payload); } else { - keys[i] = new TermFreq(key, weight); + keys[i] = new TermFreqPayload(key, weight); payload = null; } @@ -758,7 +756,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { if (doPayloads) { suggester.build(new TermFreqPayloadArrayIterator(shuffle(payloadKeys))); } else { - suggester.build(new TermFreqArrayIterator(shuffle(keys))); + suggester.build(new TermFreqPayloadArrayIterator(shuffle(keys))); } for (String prefix : allPrefixes) { @@ -876,8 +874,8 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { public void testMaxSurfaceFormsPerAnalyzedForm() throws Exception { Analyzer a = new MockAnalyzer(random()); AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 2, -1); - suggester.build(new TermFreqArrayIterator(shuffle(new TermFreq("a", 40), - new TermFreq("a ", 50), new TermFreq(" a", 60)))); + suggester.build(new TermFreqPayloadArrayIterator(shuffle(new TermFreqPayload("a", 40), + new TermFreqPayload("a ", 50), new TermFreqPayload(" a", 60)))); List results = suggester.lookup("a", false, 5); assertEquals(2, results.size()); @@ -891,11 +889,11 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { Analyzer a = new MockAnalyzer(random()); AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.EXACT_FIRST, 256, -1); - suggester.build(new TermFreqArrayIterator(new TermFreq[] { - new TermFreq("a", 2), - new TermFreq("a b c", 3), - new TermFreq("a c a", 1), - new TermFreq("a c b", 1), + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { + new TermFreqPayload("a", 2), + new TermFreqPayload("a b c", 3), + new TermFreqPayload("a c a", 1), + new TermFreqPayload("a c b", 1), })); suggester.lookup("a", false, 4); @@ -907,10 +905,10 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.EXACT_FIRST, 256, -1); - suggester.build(new TermFreqArrayIterator(new TermFreq[] { - new TermFreq("a", 5), - new TermFreq("a b", 3), - new TermFreq("a c", 4), + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { + new TermFreqPayload("a", 5), + new TermFreqPayload("a b", 3), + new TermFreqPayload("a c", 4), })); List results = suggester.lookup("a", false, 3); @@ -972,9 +970,9 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1); - suggester.build(new TermFreqArrayIterator(shuffle( - new TermFreq("hambone", 6), - new TermFreq("nellie", 5)))); + suggester.build(new TermFreqPayloadArrayIterator(shuffle( + new TermFreqPayload("hambone", 6), + new TermFreqPayload("nellie", 5)))); List results = suggester.lookup("nellie", false, 2); assertEquals(2, results.size()); @@ -1041,9 +1039,9 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1); - suggester.build(new TermFreqArrayIterator(new TermFreq[] { - new TermFreq("a", 6), - new TermFreq("b", 5), + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { + new TermFreqPayload("a", 6), + new TermFreqPayload("b", 5), })); List results = suggester.lookup("a", false, 2); @@ -1114,21 +1112,21 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1); - suggester.build(new TermFreqArrayIterator(new TermFreq[] { - new TermFreq("a a", 50), - new TermFreq("a b", 50), + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { + new TermFreqPayload("a a", 50), + new TermFreqPayload("a b", 50), })); } public void testDupSurfaceFormsMissingResults3() throws Exception { Analyzer a = new MockAnalyzer(random()); AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1); - suggester.build(new TermFreqArrayIterator(new TermFreq[] { - new TermFreq("a a", 7), - new TermFreq("a a", 7), - new TermFreq("a c", 6), - new TermFreq("a c", 3), - new TermFreq("a b", 5), + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { + new TermFreqPayload("a a", 7), + new TermFreqPayload("a a", 7), + new TermFreqPayload("a c", 6), + new TermFreqPayload("a c", 3), + new TermFreqPayload("a b", 5), })); assertEquals("[a a/7, a c/6, a b/5]", suggester.lookup("a", false, 3).toString()); } @@ -1136,9 +1134,9 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { public void testEndingSpace() throws Exception { Analyzer a = new MockAnalyzer(random()); AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1); - suggester.build(new TermFreqArrayIterator(new TermFreq[] { - new TermFreq("i love lucy", 7), - new TermFreq("isla de muerta", 8), + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { + new TermFreqPayload("i love lucy", 7), + new TermFreqPayload("isla de muerta", 8), })); assertEquals("[isla de muerta/8, i love lucy/7]", suggester.lookup("i", false, 3).toString()); assertEquals("[i love lucy/7]", suggester.lookup("i ", false, 3).toString()); @@ -1169,15 +1167,15 @@ public class AnalyzingSuggesterTest extends LuceneTestCase { }; AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, 1); - suggester.build(new TermFreqArrayIterator(new TermFreq[] {new TermFreq("a", 1)})); + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {new TermFreqPayload("a", 1)})); assertEquals("[a/1]", suggester.lookup("a", false, 1).toString()); } public void testIllegalLookupArgument() throws Exception { Analyzer a = new MockAnalyzer(random()); AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1); - suggester.build(new TermFreqArrayIterator(new TermFreq[] { - new TermFreq("а где Люси?", 7), + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { + new TermFreqPayload("а где Люси?", 7), })); try { suggester.lookup("а\u001E", false, 3); diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java index 06556f5..e65f2bc 100644 --- lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java @@ -41,8 +41,8 @@ import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.search.suggest.Lookup.LookupResult; -import org.apache.lucene.search.suggest.TermFreq; -import org.apache.lucene.search.suggest.TermFreqArrayIterator; +import org.apache.lucene.search.suggest.TermFreqPayload; +import org.apache.lucene.search.suggest.TermFreqPayloadArrayIterator; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.LuceneTestCase; @@ -54,16 +54,16 @@ import org.apache.lucene.util.fst.Util; public class FuzzySuggesterTest extends LuceneTestCase { public void testRandomEdits() throws IOException { - List keys = new ArrayList(); + List keys = new ArrayList(); int numTerms = atLeast(100); for (int i = 0; i < numTerms; i++) { - keys.add(new TermFreq("boo" + _TestUtil.randomSimpleString(random()), 1 + random().nextInt(100))); + keys.add(new TermFreqPayload("boo" + _TestUtil.randomSimpleString(random()), 1 + random().nextInt(100))); } - keys.add(new TermFreq("foo bar boo far", 12)); + keys.add(new TermFreqPayload("foo bar boo far", 12)); MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false); FuzzySuggester suggester = new FuzzySuggester(analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS, 0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, FuzzySuggester.DEFAULT_UNICODE_AWARE); - suggester.build(new TermFreqArrayIterator(keys)); + suggester.build(new TermFreqPayloadArrayIterator(keys)); int numIters = atLeast(10); for (int i = 0; i < numIters; i++) { String addRandomEdit = addRandomEdit("foo bar boo", FuzzySuggester.DEFAULT_NON_FUZZY_PREFIX); @@ -75,16 +75,16 @@ public class FuzzySuggesterTest extends LuceneTestCase { } public void testNonLatinRandomEdits() throws IOException { - List keys = new ArrayList(); + List keys = new ArrayList(); int numTerms = atLeast(100); for (int i = 0; i < numTerms; i++) { - keys.add(new TermFreq("буу" + _TestUtil.randomSimpleString(random()), 1 + random().nextInt(100))); + keys.add(new TermFreqPayload("буу" + _TestUtil.randomSimpleString(random()), 1 + random().nextInt(100))); } - keys.add(new TermFreq("фуу бар буу фар", 12)); + keys.add(new TermFreqPayload("фуу бар буу фар", 12)); MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false); FuzzySuggester suggester = new FuzzySuggester(analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS, 0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, true); - suggester.build(new TermFreqArrayIterator(keys)); + suggester.build(new TermFreqPayloadArrayIterator(keys)); int numIters = atLeast(10); for (int i = 0; i < numIters; i++) { String addRandomEdit = addRandomEdit("фуу бар буу", 0); @@ -97,15 +97,15 @@ public class FuzzySuggesterTest extends LuceneTestCase { /** this is basically the WFST test ported to KeywordAnalyzer. so it acts the same */ public void testKeyword() throws Exception { - TermFreq keys[] = new TermFreq[] { - new TermFreq("foo", 50), - new TermFreq("bar", 10), - new TermFreq("barbar", 12), - new TermFreq("barbara", 6) + TermFreqPayload keys[] = new TermFreqPayload[] { + new TermFreqPayload("foo", 50), + new TermFreqPayload("bar", 10), + new TermFreqPayload("barbar", 12), + new TermFreqPayload("barbara", 6) }; FuzzySuggester suggester = new FuzzySuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)); - suggester.build(new TermFreqArrayIterator(keys)); + suggester.build(new TermFreqPayloadArrayIterator(keys)); List results = suggester.lookup(_TestUtil.stringToCharSequence("bariar", random()), false, 2); assertEquals(2, results.size()); @@ -172,14 +172,14 @@ public class FuzzySuggesterTest extends LuceneTestCase { * basic "standardanalyzer" test with stopword removal */ public void testStandard() throws Exception { - TermFreq keys[] = new TermFreq[] { - new TermFreq("the ghost of christmas past", 50), + TermFreqPayload keys[] = new TermFreqPayload[] { + new TermFreqPayload("the ghost of christmas past", 50), }; Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET); FuzzySuggester suggester = new FuzzySuggester(standard); suggester.setPreservePositionIncrements(false); - suggester.build(new TermFreqArrayIterator(keys)); + suggester.build(new TermFreqPayloadArrayIterator(keys)); List results = suggester.lookup(_TestUtil.stringToCharSequence("the ghost of chris", random()), false, 1); assertEquals(1, results.size()); @@ -200,16 +200,16 @@ public class FuzzySuggesterTest extends LuceneTestCase { } public void testNoSeps() throws Exception { - TermFreq[] keys = new TermFreq[] { - new TermFreq("ab cd", 0), - new TermFreq("abcd", 1), + TermFreqPayload[] keys = new TermFreqPayload[] { + new TermFreqPayload("ab cd", 0), + new TermFreqPayload("abcd", 1), }; int options = 0; Analyzer a = new MockAnalyzer(random()); FuzzySuggester suggester = new FuzzySuggester(a, a, options, 256, -1, 1, true, 1, 3, false); - suggester.build(new TermFreqArrayIterator(keys)); + suggester.build(new TermFreqPayloadArrayIterator(keys)); // TODO: would be nice if "ab " would allow the test to // pass, and more generally if the analyzer can know // that the user's current query has ended at a word, @@ -270,12 +270,12 @@ public class FuzzySuggesterTest extends LuceneTestCase { } }; - TermFreq keys[] = new TermFreq[] { - new TermFreq("wifi network is slow", 50), - new TermFreq("wi fi network is fast", 10), + TermFreqPayload keys[] = new TermFreqPayload[] { + new TermFreqPayload("wifi network is slow", 50), + new TermFreqPayload("wi fi network is fast", 10), }; FuzzySuggester suggester = new FuzzySuggester(analyzer); - suggester.build(new TermFreqArrayIterator(keys)); + suggester.build(new TermFreqPayloadArrayIterator(keys)); List results = suggester.lookup("wifi network", false, 10); if (VERBOSE) { @@ -290,7 +290,7 @@ public class FuzzySuggesterTest extends LuceneTestCase { public void testEmpty() throws Exception { FuzzySuggester suggester = new FuzzySuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)); - suggester.build(new TermFreqArrayIterator(new TermFreq[0])); + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[0])); List result = suggester.lookup("a", false, 20); assertTrue(result.isEmpty()); @@ -344,12 +344,12 @@ public class FuzzySuggesterTest extends LuceneTestCase { } }; - TermFreq keys[] = new TermFreq[] { - new TermFreq("ab xc", 50), - new TermFreq("ba xd", 50), + TermFreqPayload keys[] = new TermFreqPayload[] { + new TermFreqPayload("ab xc", 50), + new TermFreqPayload("ba xd", 50), }; FuzzySuggester suggester = new FuzzySuggester(analyzer); - suggester.build(new TermFreqArrayIterator(keys)); + suggester.build(new TermFreqPayloadArrayIterator(keys)); List results = suggester.lookup("ab x", false, 1); assertTrue(results.size() == 1); } @@ -418,11 +418,11 @@ public class FuzzySuggesterTest extends LuceneTestCase { Analyzer a = getUnusualAnalyzer(); FuzzySuggester suggester = new FuzzySuggester(a, a, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1, 1, true, 1, 3, false); - suggester.build(new TermFreqArrayIterator(new TermFreq[] { - new TermFreq("x y", 1), - new TermFreq("x y z", 3), - new TermFreq("x", 2), - new TermFreq("z z z", 20), + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { + new TermFreqPayload("x y", 1), + new TermFreqPayload("x y z", 3), + new TermFreqPayload("x", 2), + new TermFreqPayload("z z z", 20), })); //System.out.println("ALL: " + suggester.lookup("x y", false, 6)); @@ -458,11 +458,11 @@ public class FuzzySuggesterTest extends LuceneTestCase { Analyzer a = getUnusualAnalyzer(); FuzzySuggester suggester = new FuzzySuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1, 1, true, 1, 3, false); - suggester.build(new TermFreqArrayIterator(new TermFreq[] { - new TermFreq("x y", 1), - new TermFreq("x y z", 3), - new TermFreq("x", 2), - new TermFreq("z z z", 20), + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { + new TermFreqPayload("x y", 1), + new TermFreqPayload("x y z", 3), + new TermFreqPayload("x", 2), + new TermFreqPayload("z z z", 20), })); for(int topN=1;topN<6;topN++) { @@ -491,19 +491,19 @@ public class FuzzySuggesterTest extends LuceneTestCase { } // Holds surface form separately: - private static class TermFreq2 implements Comparable { + private static class TermFreqPayload2 implements Comparable { public final String surfaceForm; public final String analyzedForm; public final long weight; - public TermFreq2(String surfaceForm, String analyzedForm, long weight) { + public TermFreqPayload2(String surfaceForm, String analyzedForm, long weight) { this.surfaceForm = surfaceForm; this.analyzedForm = analyzedForm; this.weight = weight; } @Override - public int compareTo(TermFreq2 other) { + public int compareTo(TermFreqPayload2 other) { int cmp = analyzedForm.compareTo(other.analyzedForm); if (cmp != 0) { return cmp; @@ -596,11 +596,11 @@ public class FuzzySuggesterTest extends LuceneTestCase { int numQueries = atLeast(100); - final List slowCompletor = new ArrayList(); + final List slowCompletor = new ArrayList(); final TreeSet allPrefixes = new TreeSet(); final Set seen = new HashSet(); - TermFreq[] keys = new TermFreq[numQueries]; + TermFreqPayload[] keys = new TermFreqPayload[numQueries]; boolean preserveSep = random().nextBoolean(); boolean unicodeAware = random().nextBoolean(); @@ -666,17 +666,17 @@ public class FuzzySuggesterTest extends LuceneTestCase { } // we can probably do Integer.MAX_VALUE here, but why worry. int weight = random().nextInt(1<<24); - keys[i] = new TermFreq(key, weight); + keys[i] = new TermFreqPayload(key, weight); - slowCompletor.add(new TermFreq2(key, analyzedKey, weight)); + slowCompletor.add(new TermFreqPayload2(key, analyzedKey, weight)); } if (VERBOSE) { // Don't just sort original list, to avoid VERBOSE // altering the test: - List sorted = new ArrayList(slowCompletor); + List sorted = new ArrayList(slowCompletor); Collections.sort(sorted); - for(TermFreq2 ent : sorted) { + for(TermFreqPayload2 ent : sorted) { System.out.println(" surface='" + ent.surfaceForm + " analyzed='" + ent.analyzedForm + "' weight=" + ent.weight); } } @@ -684,7 +684,7 @@ public class FuzzySuggesterTest extends LuceneTestCase { Analyzer a = new MockTokenEatingAnalyzer(numStopChars, preserveHoles); FuzzySuggester suggester = new FuzzySuggester(a, a, preserveSep ? AnalyzingSuggester.PRESERVE_SEP : 0, 256, -1, 1, false, 1, 3, unicodeAware); - suggester.build(new TermFreqArrayIterator(keys)); + suggester.build(new TermFreqPayloadArrayIterator(keys)); for (String prefix : allPrefixes) { @@ -756,7 +756,7 @@ public class FuzzySuggesterTest extends LuceneTestCase { assertTrue(automaton.isDeterministic()); // TODO: could be faster... but its slowCompletor for a reason BytesRef spare = new BytesRef(); - for (TermFreq2 e : slowCompletor) { + for (TermFreqPayload2 e : slowCompletor) { spare.copyChars(e.analyzedForm); Set finiteStrings = suggester.toFiniteStrings(spare, tokenStreamToAutomaton); for (IntsRef intsRef : finiteStrings) { @@ -825,14 +825,14 @@ public class FuzzySuggesterTest extends LuceneTestCase { Analyzer a = new MockAnalyzer(random()); FuzzySuggester suggester = new FuzzySuggester(a, a, 0, 2, -1, 1, true, 1, 3, false); - List keys = Arrays.asList(new TermFreq[] { - new TermFreq("a", 40), - new TermFreq("a ", 50), - new TermFreq(" a", 60), + List keys = Arrays.asList(new TermFreqPayload[] { + new TermFreqPayload("a", 40), + new TermFreqPayload("a ", 50), + new TermFreqPayload(" a", 60), }); Collections.shuffle(keys, random()); - suggester.build(new TermFreqArrayIterator(keys)); + suggester.build(new TermFreqPayloadArrayIterator(keys)); List results = suggester.lookup("a", false, 5); assertEquals(2, results.size()); @@ -846,15 +846,15 @@ public class FuzzySuggesterTest extends LuceneTestCase { Analyzer a = new MockAnalyzer(random()); FuzzySuggester suggester = new FuzzySuggester(a, a, FuzzySuggester.PRESERVE_SEP, 2, -1, 2, true, 1, 3, false); - List keys = Arrays.asList(new TermFreq[] { - new TermFreq("foo bar", 40), - new TermFreq("foo bar baz", 50), - new TermFreq("barbaz", 60), - new TermFreq("barbazfoo", 10), + List keys = Arrays.asList(new TermFreqPayload[] { + new TermFreqPayload("foo bar", 40), + new TermFreqPayload("foo bar baz", 50), + new TermFreqPayload("barbaz", 60), + new TermFreqPayload("barbazfoo", 10), }); Collections.shuffle(keys, random()); - suggester.build(new TermFreqArrayIterator(keys)); + suggester.build(new TermFreqPayloadArrayIterator(keys)); assertEquals("[foo bar baz/50, foo bar/40]", suggester.lookup("foobar", false, 5).toString()); assertEquals("[foo bar baz/50]", suggester.lookup("foobarbaz", false, 5).toString()); @@ -929,25 +929,25 @@ public class FuzzySuggesterTest extends LuceneTestCase { public void testRandom2() throws Throwable { final int NUM = atLeast(200); - final List answers = new ArrayList(); + final List answers = new ArrayList(); final Set seen = new HashSet(); for(int i=0;i() { + Collections.sort(answers, new Comparator() { @Override - public int compare(TermFreq a, TermFreq b) { + public int compare(TermFreqPayload a, TermFreqPayload b) { return a.term.compareTo(b.term); } }); if (VERBOSE) { System.out.println("\nTEST: targets"); - for(TermFreq tf : answers) { + for(TermFreqPayload tf : answers) { System.out.println(" " + tf.term.utf8ToString() + " freq=" + tf.v); } } @@ -965,7 +965,7 @@ public class FuzzySuggesterTest extends LuceneTestCase { } Collections.shuffle(answers, random()); - suggest.build(new TermFreqArrayIterator(answers.toArray(new TermFreq[answers.size()]))); + suggest.build(new TermFreqPayloadArrayIterator(answers.toArray(new TermFreqPayload[answers.size()]))); final int ITERS = atLeast(100); for(int iter=0;iter slowFuzzyMatch(int prefixLen, int maxEdits, boolean allowTransposition, List answers, String frag) { + private List slowFuzzyMatch(int prefixLen, int maxEdits, boolean allowTransposition, List answers, String frag) { final List results = new ArrayList(); final int fragLen = frag.length(); - for(TermFreq tf : answers) { + for(TermFreqPayload tf : answers) { //System.out.println(" check s=" + tf.term.utf8ToString()); boolean prefixMatches = true; for(int i=0;i keys = shuffle( - new TermFreq("foo bar baz blah", 50), - new TermFreq("boo foo bar foo bee", 20) + Iterable keys = shuffle( + new TermFreqPayload("foo bar baz blah", 50), + new TermFreqPayload("boo foo bar foo bee", 20) ); Analyzer a = new MockAnalyzer(random()); FreeTextSuggester sug = new FreeTextSuggester(a, a, 2, (byte) 0x20); - sug.build(new TermFreqArrayIterator(keys)); + sug.build(new TermFreqPayloadArrayIterator(keys)); for(int i=0;i<2;i++) { @@ -101,12 +101,12 @@ public class TestFreeTextSuggester extends LuceneTestCase { public void testIllegalByteDuringBuild() throws Exception { // Default separator is INFORMATION SEPARATOR TWO // (0x1e), so no input token is allowed to contain it - Iterable keys = shuffle( - new TermFreq("foo\u001ebar baz", 50) + Iterable keys = shuffle( + new TermFreqPayload("foo\u001ebar baz", 50) ); FreeTextSuggester sug = new FreeTextSuggester(new MockAnalyzer(random())); try { - sug.build(new TermFreqArrayIterator(keys)); + sug.build(new TermFreqPayloadArrayIterator(keys)); fail("did not hit expected exception"); } catch (IllegalArgumentException iae) { // expected @@ -116,11 +116,11 @@ public class TestFreeTextSuggester extends LuceneTestCase { public void testIllegalByteDuringQuery() throws Exception { // Default separator is INFORMATION SEPARATOR TWO // (0x1e), so no input token is allowed to contain it - Iterable keys = shuffle( - new TermFreq("foo bar baz", 50) + Iterable keys = shuffle( + new TermFreqPayload("foo bar baz", 50) ); FreeTextSuggester sug = new FreeTextSuggester(new MockAnalyzer(random())); - sug.build(new TermFreqArrayIterator(keys)); + sug.build(new TermFreqPayloadArrayIterator(keys)); try { sug.lookup("foo\u001eb", 10); @@ -136,7 +136,7 @@ public class TestFreeTextSuggester extends LuceneTestCase { // Skip header: lfd.nextDoc(); FreeTextSuggester sug = new FreeTextSuggester(new MockAnalyzer(random())); - sug.build(new TermFreqIterator() { + sug.build(new TermFreqPayloadIterator() { private int count; @@ -161,6 +161,16 @@ public class TestFreeTextSuggester extends LuceneTestCase { } return new BytesRef(doc.get("body")); } + + @Override + public BytesRef payload() { + return null; + } + + @Override + public boolean hasPayloads() { + return false; + } }); if (VERBOSE) { System.out.println(sug.sizeInBytes() + " bytes"); @@ -175,13 +185,13 @@ public class TestFreeTextSuggester extends LuceneTestCase { // Make sure you can suggest based only on unigram model: public void testUnigrams() throws Exception { - Iterable keys = shuffle( - new TermFreq("foo bar baz blah boo foo bar foo bee", 50) + Iterable keys = shuffle( + new TermFreqPayload("foo bar baz blah boo foo bar foo bee", 50) ); Analyzer a = new MockAnalyzer(random()); FreeTextSuggester sug = new FreeTextSuggester(a, a, 1, (byte) 0x20); - sug.build(new TermFreqArrayIterator(keys)); + sug.build(new TermFreqPayloadArrayIterator(keys)); // Sorts first by count, descending, second by term, ascending assertEquals("bar/0.22 baz/0.11 bee/0.11 blah/0.11 boo/0.11", toString(sug.lookup("b", 10))); @@ -189,24 +199,24 @@ public class TestFreeTextSuggester extends LuceneTestCase { // Make sure the last token is not duplicated public void testNoDupsAcrossGrams() throws Exception { - Iterable keys = shuffle( - new TermFreq("foo bar bar bar bar", 50) + Iterable keys = shuffle( + new TermFreqPayload("foo bar bar bar bar", 50) ); Analyzer a = new MockAnalyzer(random()); FreeTextSuggester sug = new FreeTextSuggester(a, a, 2, (byte) 0x20); - sug.build(new TermFreqArrayIterator(keys)); + sug.build(new TermFreqPayloadArrayIterator(keys)); assertEquals("foo bar/1.00", toString(sug.lookup("foo b", 10))); } // Lookup of just empty string produces unicode only matches: public void testEmptyString() throws Exception { - Iterable keys = shuffle( - new TermFreq("foo bar bar bar bar", 50) + Iterable keys = shuffle( + new TermFreqPayload("foo bar bar bar bar", 50) ); Analyzer a = new MockAnalyzer(random()); FreeTextSuggester sug = new FreeTextSuggester(a, a, 2, (byte) 0x20); - sug.build(new TermFreqArrayIterator(keys)); + sug.build(new TermFreqPayloadArrayIterator(keys)); try { sug.lookup("", 10); fail("did not hit exception"); @@ -228,11 +238,11 @@ public class TestFreeTextSuggester extends LuceneTestCase { } }; - Iterable keys = shuffle( - new TermFreq("wizard of oz", 50) + Iterable keys = shuffle( + new TermFreqPayload("wizard of oz", 50) ); FreeTextSuggester sug = new FreeTextSuggester(a, a, 3, (byte) 0x20); - sug.build(new TermFreqArrayIterator(keys)); + sug.build(new TermFreqPayloadArrayIterator(keys)); assertEquals("wizard _ oz/1.00", toString(sug.lookup("wizard of", 10))); @@ -256,11 +266,11 @@ public class TestFreeTextSuggester extends LuceneTestCase { } }; - Iterable keys = shuffle( - new TermFreq("wizard of of oz", 50) + Iterable keys = shuffle( + new TermFreqPayload("wizard of of oz", 50) ); FreeTextSuggester sug = new FreeTextSuggester(a, a, 3, (byte) 0x20); - sug.build(new TermFreqArrayIterator(keys)); + sug.build(new TermFreqPayloadArrayIterator(keys)); assertEquals("", toString(sug.lookup("wizard of of", 10))); } @@ -320,7 +330,7 @@ public class TestFreeTextSuggester extends LuceneTestCase { // Build suggester model: FreeTextSuggester sug = new FreeTextSuggester(a, a, grams, (byte) 0x20); - sug.build(new TermFreqIterator() { + sug.build(new TermFreqPayloadIterator() { int upto; @Override @@ -342,6 +352,16 @@ public class TestFreeTextSuggester extends LuceneTestCase { public long weight() { return random().nextLong(); } + + @Override + public BytesRef payload() { + return null; + } + + @Override + public boolean hasPayloads() { + return false; + } }); // Build inefficient but hopefully correct model: diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java index ff835bd..42594e7 100644 --- lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java @@ -28,8 +28,8 @@ import org.apache.lucene.util.*; * Unit tests for {@link FSTCompletion}. */ public class FSTCompletionTest extends LuceneTestCase { - public static TermFreq tf(String t, int v) { - return new TermFreq(t, v); + public static TermFreqPayload tf(String t, int v) { + return new TermFreqPayload(t, v); } private FSTCompletion completion; @@ -40,15 +40,15 @@ public class FSTCompletionTest extends LuceneTestCase { super.setUp(); FSTCompletionBuilder builder = new FSTCompletionBuilder(); - for (TermFreq tf : evalKeys()) { + for (TermFreqPayload tf : evalKeys()) { builder.add(tf.term, (int) tf.v); } completion = builder.build(); completionAlphabetical = new FSTCompletion(completion.getFST(), false, true); } - private TermFreq[] evalKeys() { - final TermFreq[] keys = new TermFreq[] { + private TermFreqPayload[] evalKeys() { + final TermFreqPayload[] keys = new TermFreqPayload[] { tf("one", 0), tf("oneness", 1), tf("onerous", 1), @@ -157,17 +157,17 @@ public class FSTCompletionTest extends LuceneTestCase { FSTCompletionLookup lookup = new FSTCompletionLookup(10, true); Random r = random(); - List keys = new ArrayList(); + List keys = new ArrayList(); for (int i = 0; i < 5000; i++) { - keys.add(new TermFreq(_TestUtil.randomSimpleString(r), -1)); + keys.add(new TermFreqPayload(_TestUtil.randomSimpleString(r), -1)); } - lookup.build(new TermFreqArrayIterator(keys)); + lookup.build(new TermFreqPayloadArrayIterator(keys)); // All the weights were constant, so all returned buckets must be constant, whatever they // are. Long previous = null; - for (TermFreq tf : keys) { + for (TermFreqPayload tf : keys) { Long current = ((Number)lookup.get(_TestUtil.bytesToCharSequence(tf.term, random()))).longValue(); if (previous != null) { assertEquals(previous, current); @@ -177,11 +177,11 @@ public class FSTCompletionTest extends LuceneTestCase { } public void testMultilingualInput() throws Exception { - List input = LookupBenchmarkTest.readTop50KWiki(); + List input = LookupBenchmarkTest.readTop50KWiki(); FSTCompletionLookup lookup = new FSTCompletionLookup(); - lookup.build(new TermFreqArrayIterator(input)); - for (TermFreq tf : input) { + lookup.build(new TermFreqPayloadArrayIterator(input)); + for (TermFreqPayload tf : input) { assertNotNull("Not found: " + tf.term.toString(), lookup.get(_TestUtil.bytesToCharSequence(tf.term, random()))); assertEquals(tf.term.utf8ToString(), lookup.lookup(_TestUtil.bytesToCharSequence(tf.term, random()), true, 1).get(0).key.toString()); } @@ -198,17 +198,17 @@ public class FSTCompletionTest extends LuceneTestCase { } public void testRandom() throws Exception { - List freqs = new ArrayList(); + List freqs = new ArrayList(); Random rnd = random(); for (int i = 0; i < 2500 + rnd.nextInt(2500); i++) { int weight = rnd.nextInt(100); - freqs.add(new TermFreq("" + rnd.nextLong(), weight)); + freqs.add(new TermFreqPayload("" + rnd.nextLong(), weight)); } FSTCompletionLookup lookup = new FSTCompletionLookup(); - lookup.build(new TermFreqArrayIterator(freqs.toArray(new TermFreq[freqs.size()]))); + lookup.build(new TermFreqPayloadArrayIterator(freqs.toArray(new TermFreqPayload[freqs.size()]))); - for (TermFreq tf : freqs) { + for (TermFreqPayload tf : freqs) { final String term = tf.term.utf8ToString(); for (int i = 1; i < term.length(); i++) { String prefix = term.substring(0, i); diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java index 3d96ba7..2fe2c7a 100644 --- lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java @@ -20,8 +20,8 @@ package org.apache.lucene.search.suggest.fst; import java.util.*; import org.apache.lucene.search.suggest.Lookup.LookupResult; -import org.apache.lucene.search.suggest.TermFreq; -import org.apache.lucene.search.suggest.TermFreqArrayIterator; +import org.apache.lucene.search.suggest.TermFreqPayload; +import org.apache.lucene.search.suggest.TermFreqPayloadArrayIterator; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; @@ -29,16 +29,16 @@ import org.apache.lucene.util._TestUtil; public class WFSTCompletionTest extends LuceneTestCase { public void testBasic() throws Exception { - TermFreq keys[] = new TermFreq[] { - new TermFreq("foo", 50), - new TermFreq("bar", 10), - new TermFreq("barbar", 12), - new TermFreq("barbara", 6) + TermFreqPayload keys[] = new TermFreqPayload[] { + new TermFreqPayload("foo", 50), + new TermFreqPayload("bar", 10), + new TermFreqPayload("barbar", 12), + new TermFreqPayload("barbara", 6) }; Random random = new Random(random().nextLong()); WFSTCompletionLookup suggester = new WFSTCompletionLookup(); - suggester.build(new TermFreqArrayIterator(keys)); + suggester.build(new TermFreqPayloadArrayIterator(keys)); // top N of 2, but only foo is available List results = suggester.lookup(_TestUtil.stringToCharSequence("f", random), false, 2); @@ -81,9 +81,9 @@ public class WFSTCompletionTest extends LuceneTestCase { WFSTCompletionLookup suggester = new WFSTCompletionLookup(true); - suggester.build(new TermFreqArrayIterator(new TermFreq[] { - new TermFreq("x y", 20), - new TermFreq("x", 2), + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { + new TermFreqPayload("x y", 20), + new TermFreqPayload("x", 2), })); for(int topN=1;topN<4;topN++) { @@ -105,9 +105,9 @@ public class WFSTCompletionTest extends LuceneTestCase { WFSTCompletionLookup suggester = new WFSTCompletionLookup(false); - suggester.build(new TermFreqArrayIterator(new TermFreq[] { - new TermFreq("x y", 20), - new TermFreq("x", 2), + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { + new TermFreqPayload("x y", 20), + new TermFreqPayload("x", 2), })); for(int topN=1;topN<4;topN++) { @@ -131,7 +131,7 @@ public class WFSTCompletionTest extends LuceneTestCase { final TreeMap slowCompletor = new TreeMap(); final TreeSet allPrefixes = new TreeSet(); - TermFreq[] keys = new TermFreq[numWords]; + TermFreqPayload[] keys = new TermFreqPayload[numWords]; for (int i = 0; i < numWords; i++) { String s; @@ -150,11 +150,11 @@ public class WFSTCompletionTest extends LuceneTestCase { // we can probably do Integer.MAX_VALUE here, but why worry. int weight = random().nextInt(1<<24); slowCompletor.put(s, (long)weight); - keys[i] = new TermFreq(s, weight); + keys[i] = new TermFreqPayload(s, weight); } WFSTCompletionLookup suggester = new WFSTCompletionLookup(false); - suggester.build(new TermFreqArrayIterator(keys)); + suggester.build(new TermFreqPayloadArrayIterator(keys)); Random random = new Random(random().nextLong()); for (String prefix : allPrefixes) { @@ -205,16 +205,16 @@ public class WFSTCompletionTest extends LuceneTestCase { WFSTCompletionLookup suggester = new WFSTCompletionLookup(false); - suggester.build(new TermFreqArrayIterator(new TermFreq[] { - new TermFreq(key1, 50), - new TermFreq(key2, 50), + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] { + new TermFreqPayload(key1, 50), + new TermFreqPayload(key2, 50), })); } public void testEmpty() throws Exception { WFSTCompletionLookup suggester = new WFSTCompletionLookup(false); - suggester.build(new TermFreqArrayIterator(new TermFreq[0])); + suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[0])); List result = suggester.lookup("a", false, 20); assertTrue(result.isEmpty()); }