diff --git lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java
index 5882fdf..826ba28 100644
--- lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java
+++ lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java
@@ -23,6 +23,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.search.suggest.InputIterator;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.BytesRef;
@@ -59,7 +60,7 @@ public class HighFrequencyDictionary implements Dictionary {
return new HighFrequencyIterator();
}
- final class HighFrequencyIterator implements TermFreqPayloadIterator {
+ final class HighFrequencyIterator implements InputIterator {
private final BytesRef spare = new BytesRef();
private final TermsEnum termsEnum;
private int minNumDocs;
diff --git lucene/suggest/src/java/org/apache/lucene/search/spell/TermFreqPayloadIterator.java lucene/suggest/src/java/org/apache/lucene/search/spell/TermFreqPayloadIterator.java
deleted file mode 100644
index e780db4..0000000
--- lucene/suggest/src/java/org/apache/lucene/search/spell/TermFreqPayloadIterator.java
+++ /dev/null
@@ -1,83 +0,0 @@
-package org.apache.lucene.search.spell;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-import org.apache.lucene.search.suggest.Lookup.LookupResult; // javadocs
-import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester; // javadocs
-import org.apache.lucene.search.suggest.analyzing.AnalyzingSuggester; // javadocs
-import org.apache.lucene.search.suggest.analyzing.FuzzySuggester; // javadocs
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.BytesRefIterator;
-
-/**
- * Interface for enumerating term,weight,payload triples;
- * currently only {@link AnalyzingSuggester}, {@link
- * FuzzySuggester} and {@link AnalyzingInfixSuggester} support payloads.
- */
-public interface TermFreqPayloadIterator extends BytesRefIterator {
-
- /** A term's weight, higher numbers mean better suggestions. */
- public long weight();
-
- /** An arbitrary byte[] to record per suggestion. See
- * {@link LookupResult#payload} to retrieve the payload
- * for each suggestion. */
- public BytesRef payload();
-
- /** Returns true if the iterator has payloads */
- public boolean hasPayloads();
-
- /**
- * Wraps a BytesRefIterator as a TermFreqPayloadIterator, with all weights
- * set to 1 and carries no payload
- */
- public static class TermFreqPayloadIteratorWrapper implements TermFreqPayloadIterator {
- private final BytesRefIterator wrapped;
-
- /**
- * Creates a new wrapper, wrapping the specified iterator and
- * specifying a weight value of 1 for all terms
- * and nullifies associated payloads.
- */
- public TermFreqPayloadIteratorWrapper(BytesRefIterator wrapped) {
- this.wrapped = wrapped;
- }
-
- @Override
- public long weight() {
- return 1;
- }
-
- @Override
- public BytesRef next() throws IOException {
- return wrapped.next();
- }
-
- @Override
- public BytesRef payload() {
- return null;
- }
-
- @Override
- public boolean hasPayloads() {
- return false;
- }
- }
-}
diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferedInputIterator.java lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferedInputIterator.java
new file mode 100644
index 0000000..b9772fa
--- /dev/null
+++ lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferedInputIterator.java
@@ -0,0 +1,88 @@
+package org.apache.lucene.search.suggest;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.Counter;
+
+/**
+ * This wrapper buffers incoming elements.
+ * @lucene.experimental
+ */
+public class BufferedInputIterator implements InputIterator {
+ // TODO keep this for now
+ /** buffered term entries */
+ protected BytesRefArray entries = new BytesRefArray(Counter.newCounter());
+ /** buffered payload entries */
+ protected BytesRefArray payloads = new BytesRefArray(Counter.newCounter());
+ /** current buffer position */
+ protected int curPos = -1;
+ /** buffered weights, parallel with {@link #entries} */
+ protected long[] freqs = new long[1];
+ private final BytesRef spare = new BytesRef();
+ private final BytesRef payloadSpare = new BytesRef();
+ private final boolean hasPayloads;
+
+ /** Creates a new iterator, buffering entries from the specified iterator */
+ public BufferedInputIterator(InputIterator source) throws IOException {
+ BytesRef spare;
+ int freqIndex = 0;
+ hasPayloads = source.hasPayloads();
+ while((spare = source.next()) != null) {
+ entries.append(spare);
+ if (hasPayloads) {
+ payloads.append(source.payload());
+ }
+ if (freqIndex >= freqs.length) {
+ freqs = ArrayUtil.grow(freqs, freqs.length+1);
+ }
+ freqs[freqIndex++] = source.weight();
+ }
+
+ }
+
+ @Override
+ public long weight() {
+ return freqs[curPos];
+ }
+
+ @Override
+ public BytesRef next() throws IOException {
+ if (++curPos < entries.size()) {
+ entries.get(spare, curPos);
+ return spare;
+ }
+ return null;
+ }
+
+ @Override
+ public BytesRef payload() {
+ if (hasPayloads && curPos < payloads.size()) {
+ return payloads.get(payloadSpare, curPos);
+ }
+ return null;
+ }
+
+ @Override
+ public boolean hasPayloads() {
+ return hasPayloads;
+ }
+}
diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqPayloadIteratorWrapper.java lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqPayloadIteratorWrapper.java
deleted file mode 100644
index b78ec0e..0000000
--- lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqPayloadIteratorWrapper.java
+++ /dev/null
@@ -1,89 +0,0 @@
-package org.apache.lucene.search.suggest;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-import org.apache.lucene.search.spell.TermFreqPayloadIterator;
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.Counter;
-
-/**
- * This wrapper buffers incoming elements.
- * @lucene.experimental
- */
-public class BufferingTermFreqPayloadIteratorWrapper implements TermFreqPayloadIterator {
- // TODO keep this for now
- /** buffered term entries */
- protected BytesRefArray entries = new BytesRefArray(Counter.newCounter());
- /** buffered payload entries */
- protected BytesRefArray payloads = new BytesRefArray(Counter.newCounter());
- /** current buffer position */
- protected int curPos = -1;
- /** buffered weights, parallel with {@link #entries} */
- protected long[] freqs = new long[1];
- private final BytesRef spare = new BytesRef();
- private final BytesRef payloadSpare = new BytesRef();
- private final boolean hasPayloads;
-
- /** Creates a new iterator, buffering entries from the specified iterator */
- public BufferingTermFreqPayloadIteratorWrapper(TermFreqPayloadIterator source) throws IOException {
- BytesRef spare;
- int freqIndex = 0;
- hasPayloads = source.hasPayloads();
- while((spare = source.next()) != null) {
- entries.append(spare);
- if (hasPayloads) {
- payloads.append(source.payload());
- }
- if (freqIndex >= freqs.length) {
- freqs = ArrayUtil.grow(freqs, freqs.length+1);
- }
- freqs[freqIndex++] = source.weight();
- }
-
- }
-
- @Override
- public long weight() {
- return freqs[curPos];
- }
-
- @Override
- public BytesRef next() throws IOException {
- if (++curPos < entries.size()) {
- entries.get(spare, curPos);
- return spare;
- }
- return null;
- }
-
- @Override
- public BytesRef payload() {
- if (hasPayloads && curPos < payloads.size()) {
- return payloads.get(payloadSpare, curPos);
- }
- return null;
- }
-
- @Override
- public boolean hasPayloads() {
- return hasPayloads;
- }
-}
diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java
index 425d9db..46c61eb 100644
--- lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java
+++ lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java
@@ -27,12 +27,6 @@ import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.StorableField;
import org.apache.lucene.index.StoredDocument;
import org.apache.lucene.search.spell.Dictionary;
-import org.apache.lucene.search.spell.TermFreqPayloadIterator;
-import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester; // javadoc
-import org.apache.lucene.search.suggest.fst.FSTCompletionLookup; // javadoc
-import org.apache.lucene.search.suggest.fst.WFSTCompletionLookup; // javadoc
-import org.apache.lucene.search.suggest.jaspell.JaspellLookup; // javadoc
-import org.apache.lucene.search.suggest.tst.TSTLookup; // javadoc
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
@@ -83,10 +77,10 @@ public class DocumentDictionary implements Dictionary {
@Override
public BytesRefIterator getWordsIterator() throws IOException {
- return new TermWeightPayloadIterator(payloadField!=null);
+ return new DocumentInputIterator(payloadField!=null);
}
- final class TermWeightPayloadIterator implements TermFreqPayloadIterator {
+ final class DocumentInputIterator implements InputIterator {
private final int docCount;
private final Set relevantFields;
private final boolean hasPayloads;
@@ -100,7 +94,7 @@ public class DocumentDictionary implements Dictionary {
* index. setting withPayload to false, implies an iterator
* over only term and weight.
*/
- public TermWeightPayloadIterator(boolean hasPayloads) throws IOException {
+ public DocumentInputIterator(boolean hasPayloads) throws IOException {
docCount = reader.maxDoc() - 1;
this.hasPayloads = hasPayloads;
currentPayload = null;
diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java
index b03033b..16318b3 100644
--- lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java
+++ lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java
@@ -21,7 +21,6 @@ package org.apache.lucene.search.suggest;
import java.io.*;
import org.apache.lucene.search.spell.Dictionary;
-import org.apache.lucene.search.spell.TermFreqPayloadIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
@@ -57,11 +56,11 @@ public class FileDictionary implements Dictionary {
}
@Override
- public TermFreqPayloadIterator getWordsIterator() {
+ public InputIterator getWordsIterator() {
return new FileIterator();
}
- final class FileIterator implements TermFreqPayloadIterator {
+ final class FileIterator implements InputIterator {
private long curFreq;
private final BytesRef spare = new BytesRef();
diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/InputIterator.java lucene/suggest/src/java/org/apache/lucene/search/suggest/InputIterator.java
new file mode 100644
index 0000000..bda1332
--- /dev/null
+++ lucene/suggest/src/java/org/apache/lucene/search/suggest/InputIterator.java
@@ -0,0 +1,83 @@
+package org.apache.lucene.search.suggest;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.search.suggest.Lookup.LookupResult; // javadocs
+import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester; // javadocs
+import org.apache.lucene.search.suggest.analyzing.AnalyzingSuggester; // javadocs
+import org.apache.lucene.search.suggest.analyzing.FuzzySuggester; // javadocs
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;
+
+/**
+ * Interface for enumerating term,weight,payload triples for suggester consumption;
+ * currently only {@link AnalyzingSuggester}, {@link
+ * FuzzySuggester} and {@link AnalyzingInfixSuggester} support payloads.
+ */
+public interface InputIterator extends BytesRefIterator {
+
+ /** A term's weight, higher numbers mean better suggestions. */
+ public long weight();
+
+ /** An arbitrary byte[] to record per suggestion. See
+ * {@link LookupResult#payload} to retrieve the payload
+ * for each suggestion. */
+ public BytesRef payload();
+
+ /** Returns true if the iterator has payloads */
+ public boolean hasPayloads();
+
+ /**
+ * Wraps a BytesRefIterator as a suggester InputIterator, with all weights
+ * set to 1 and carries no payload
+ */
+ public static class InputIteratorWrapper implements InputIterator {
+ private final BytesRefIterator wrapped;
+
+ /**
+ * Creates a new wrapper, wrapping the specified iterator and
+ * specifying a weight value of 1 for all terms
+ * and nullifies associated payloads.
+ */
+ public InputIteratorWrapper(BytesRefIterator wrapped) {
+ this.wrapped = wrapped;
+ }
+
+ @Override
+ public long weight() {
+ return 1;
+ }
+
+ @Override
+ public BytesRef next() throws IOException {
+ return wrapped.next();
+ }
+
+ @Override
+ public BytesRef payload() {
+ return null;
+ }
+
+ @Override
+ public boolean hasPayloads() {
+ return false;
+ }
+ }
+}
diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java
index edee62b..dd35d85 100644
--- lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java
+++ lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java
@@ -24,7 +24,6 @@ import java.util.Comparator;
import java.util.List;
import org.apache.lucene.search.spell.Dictionary;
-import org.apache.lucene.search.spell.TermFreqPayloadIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.PriorityQueue;
@@ -154,25 +153,25 @@ public abstract class Lookup {
/** Build lookup from a dictionary. Some implementations may require sorted
* or unsorted keys from the dictionary's iterator - use
- * {@link SortedTermFreqPayloadIteratorWrapper} or
- * {@link UnsortedTermFreqPayloadIteratorWrapper} in such case.
+ * {@link SortedInputIterator} or
+ * {@link UnsortedInputIterator} in such case.
*/
public void build(Dictionary dict) throws IOException {
BytesRefIterator it = dict.getWordsIterator();
- TermFreqPayloadIterator tfit;
- if (it instanceof TermFreqPayloadIterator) {
- tfit = (TermFreqPayloadIterator)it;
+ InputIterator tfit;
+ if (it instanceof InputIterator) {
+ tfit = (InputIterator)it;
} else {
- tfit = new TermFreqPayloadIterator.TermFreqPayloadIteratorWrapper(it);
+ tfit = new InputIterator.InputIteratorWrapper(it);
}
build(tfit);
}
/**
- * Builds up a new internal {@link Lookup} representation based on the given {@link TermFreqPayloadIterator}.
+ * Builds up a new internal {@link Lookup} representation based on the given {@link InputIterator}.
* The implementation might re-sort the data internally.
*/
- public abstract void build(TermFreqPayloadIterator tfit) throws IOException;
+ public abstract void build(InputIterator tfit) throws IOException;
/**
* Look up a key and return possible completion for this key.
diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedInputIterator.java lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedInputIterator.java
new file mode 100644
index 0000000..d804f38
--- /dev/null
+++ lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedInputIterator.java
@@ -0,0 +1,226 @@
+package org.apache.lucene.search.suggest;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Comparator;
+
+import org.apache.lucene.search.suggest.Sort.ByteSequencesReader;
+import org.apache.lucene.search.suggest.Sort.ByteSequencesWriter;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.ByteArrayDataOutput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+
+/**
+ * This wrapper buffers incoming elements and makes sure they are sorted based on given comparator.
+ * @lucene.experimental
+ */
+public class SortedInputIterator implements InputIterator {
+
+ private final InputIterator source;
+ private File tempInput;
+ private File tempSorted;
+ private final ByteSequencesReader reader;
+ private final Comparator comparator;
+ private final boolean hasPayloads;
+ private boolean done = false;
+
+ private long weight;
+ private final BytesRef scratch = new BytesRef();
+ private BytesRef payload = new BytesRef();
+
+ /**
+ * Creates a new sorted wrapper, using {@link
+ * BytesRef#getUTF8SortedAsUnicodeComparator} for
+ * sorting. */
+ public SortedInputIterator(InputIterator source) throws IOException {
+ this(source, BytesRef.getUTF8SortedAsUnicodeComparator());
+ }
+
+ /**
+ * Creates a new sorted wrapper, sorting by BytesRef
+ * (ascending) then cost (ascending).
+ */
+ public SortedInputIterator(InputIterator source, Comparator comparator) throws IOException {
+ this.hasPayloads = source.hasPayloads();
+ this.source = source;
+ this.comparator = comparator;
+ this.reader = sort();
+ }
+
+ @Override
+ public BytesRef next() throws IOException {
+ boolean success = false;
+ if (done) {
+ return null;
+ }
+ try {
+ ByteArrayDataInput input = new ByteArrayDataInput();
+ if (reader.read(scratch)) {
+ weight = decode(scratch, input);
+ if (hasPayloads) {
+ payload = decodePayload(scratch, input);
+ }
+ success = true;
+ return scratch;
+ }
+ close();
+ success = done = true;
+ return null;
+ } finally {
+ if (!success) {
+ done = true;
+ close();
+ }
+ }
+ }
+
+ @Override
+ public long weight() {
+ return weight;
+ }
+
+ @Override
+ public BytesRef payload() {
+ if (hasPayloads) {
+ return payload;
+ }
+ return null;
+ }
+
+ @Override
+ public boolean hasPayloads() {
+ return hasPayloads;
+ }
+
+ /** Sortes by BytesRef (ascending) then cost (ascending). */
+ private final Comparator tieBreakByCostComparator = new Comparator() {
+
+ private final BytesRef leftScratch = new BytesRef();
+ private final BytesRef rightScratch = new BytesRef();
+ private final ByteArrayDataInput input = new ByteArrayDataInput();
+
+ @Override
+ public int compare(BytesRef left, BytesRef right) {
+ // Make shallow copy in case decode changes the BytesRef:
+ leftScratch.bytes = left.bytes;
+ leftScratch.offset = left.offset;
+ leftScratch.length = left.length;
+ rightScratch.bytes = right.bytes;
+ rightScratch.offset = right.offset;
+ rightScratch.length = right.length;
+ long leftCost = decode(leftScratch, input);
+ long rightCost = decode(rightScratch, input);
+ if (hasPayloads) {
+ decodePayload(leftScratch, input);
+ decodePayload(rightScratch, input);
+ }
+ int cmp = comparator.compare(leftScratch, rightScratch);
+ if (cmp != 0) {
+ return cmp;
+ }
+ return Long.compare(leftCost, rightCost);
+ }
+ };
+
+ private Sort.ByteSequencesReader sort() throws IOException {
+ String prefix = getClass().getSimpleName();
+ File directory = Sort.defaultTempDir();
+ tempInput = File.createTempFile(prefix, ".input", directory);
+ tempSorted = File.createTempFile(prefix, ".sorted", directory);
+
+ final Sort.ByteSequencesWriter writer = new Sort.ByteSequencesWriter(tempInput);
+ boolean success = false;
+ try {
+ BytesRef spare;
+ byte[] buffer = new byte[0];
+ ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
+
+ while ((spare = source.next()) != null) {
+ encode(writer, output, buffer, spare, source.payload(), source.weight());
+ }
+ writer.close();
+ new Sort(tieBreakByCostComparator).sort(tempInput, tempSorted);
+ ByteSequencesReader reader = new Sort.ByteSequencesReader(tempSorted);
+ success = true;
+ return reader;
+
+ } finally {
+ if (success) {
+ IOUtils.close(writer);
+ } else {
+ try {
+ IOUtils.closeWhileHandlingException(writer);
+ } finally {
+ close();
+ }
+ }
+ }
+ }
+
+ private void close() throws IOException {
+ IOUtils.close(reader);
+ if (tempInput != null) {
+ tempInput.delete();
+ }
+ if (tempSorted != null) {
+ tempSorted.delete();
+ }
+ }
+
+ /** encodes an entry (bytes+(payload)+weight) to the provided writer */
+ protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, BytesRef payload, long weight) throws IOException {
+ int requiredLength = spare.length + 8 + ((hasPayloads) ? 2 + payload.length : 0);
+ if (requiredLength >= buffer.length) {
+ buffer = ArrayUtil.grow(buffer, requiredLength);
+ }
+ output.reset(buffer);
+ output.writeBytes(spare.bytes, spare.offset, spare.length);
+ if (hasPayloads) {
+ output.writeBytes(payload.bytes, payload.offset, payload.length);
+ output.writeShort((short) payload.length);
+ }
+ output.writeLong(weight);
+ writer.write(buffer, 0, output.getPosition());
+ }
+
+ /** decodes the weight at the current position */
+ protected long decode(BytesRef scratch, ByteArrayDataInput tmpInput) {
+ tmpInput.reset(scratch.bytes);
+ tmpInput.skipBytes(scratch.length - 8); // suggestion
+ scratch.length -= 8; // long
+ return tmpInput.readLong();
+ }
+
+ /** decodes the payload at the current position */
+ protected BytesRef decodePayload(BytesRef scratch, ByteArrayDataInput tmpInput) {
+ tmpInput.reset(scratch.bytes);
+ tmpInput.skipBytes(scratch.length - 2); // skip to payload size
+ short payloadLength = tmpInput.readShort(); // read payload size
+ tmpInput.setPosition(scratch.length - 2 - payloadLength); // setPosition to start of payload
+ BytesRef payloadScratch = new BytesRef(payloadLength);
+ tmpInput.readBytes(payloadScratch.bytes, 0, payloadLength); // read payload
+ payloadScratch.length = payloadLength;
+ scratch.length -= 2; // payload length info (short)
+ scratch.length -= payloadLength; // payload
+ return payloadScratch;
+ }
+}
diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqPayloadIteratorWrapper.java lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqPayloadIteratorWrapper.java
deleted file mode 100644
index b8fa103..0000000
--- lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqPayloadIteratorWrapper.java
+++ /dev/null
@@ -1,227 +0,0 @@
-package org.apache.lucene.search.suggest;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.File;
-import java.io.IOException;
-import java.util.Comparator;
-
-import org.apache.lucene.search.spell.TermFreqPayloadIterator;
-import org.apache.lucene.search.suggest.Sort.ByteSequencesReader;
-import org.apache.lucene.search.suggest.Sort.ByteSequencesWriter;
-import org.apache.lucene.store.ByteArrayDataInput;
-import org.apache.lucene.store.ByteArrayDataOutput;
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.IOUtils;
-
-/**
- * This wrapper buffers incoming elements and makes sure they are sorted based on given comparator.
- * @lucene.experimental
- */
-public class SortedTermFreqPayloadIteratorWrapper implements TermFreqPayloadIterator {
-
- private final TermFreqPayloadIterator source;
- private File tempInput;
- private File tempSorted;
- private final ByteSequencesReader reader;
- private final Comparator comparator;
- private final boolean hasPayloads;
- private boolean done = false;
-
- private long weight;
- private final BytesRef scratch = new BytesRef();
- private BytesRef payload = new BytesRef();
-
- /**
- * Creates a new sorted wrapper, using {@link
- * BytesRef#getUTF8SortedAsUnicodeComparator} for
- * sorting. */
- public SortedTermFreqPayloadIteratorWrapper(TermFreqPayloadIterator source) throws IOException {
- this(source, BytesRef.getUTF8SortedAsUnicodeComparator());
- }
-
- /**
- * Creates a new sorted wrapper, sorting by BytesRef
- * (ascending) then cost (ascending).
- */
- public SortedTermFreqPayloadIteratorWrapper(TermFreqPayloadIterator source, Comparator comparator) throws IOException {
- this.hasPayloads = source.hasPayloads();
- this.source = source;
- this.comparator = comparator;
- this.reader = sort();
- }
-
- @Override
- public BytesRef next() throws IOException {
- boolean success = false;
- if (done) {
- return null;
- }
- try {
- ByteArrayDataInput input = new ByteArrayDataInput();
- if (reader.read(scratch)) {
- weight = decode(scratch, input);
- if (hasPayloads) {
- payload = decodePayload(scratch, input);
- }
- success = true;
- return scratch;
- }
- close();
- success = done = true;
- return null;
- } finally {
- if (!success) {
- done = true;
- close();
- }
- }
- }
-
- @Override
- public long weight() {
- return weight;
- }
-
- @Override
- public BytesRef payload() {
- if (hasPayloads) {
- return payload;
- }
- return null;
- }
-
- @Override
- public boolean hasPayloads() {
- return hasPayloads;
- }
-
- /** Sortes by BytesRef (ascending) then cost (ascending). */
- private final Comparator tieBreakByCostComparator = new Comparator() {
-
- private final BytesRef leftScratch = new BytesRef();
- private final BytesRef rightScratch = new BytesRef();
- private final ByteArrayDataInput input = new ByteArrayDataInput();
-
- @Override
- public int compare(BytesRef left, BytesRef right) {
- // Make shallow copy in case decode changes the BytesRef:
- leftScratch.bytes = left.bytes;
- leftScratch.offset = left.offset;
- leftScratch.length = left.length;
- rightScratch.bytes = right.bytes;
- rightScratch.offset = right.offset;
- rightScratch.length = right.length;
- long leftCost = decode(leftScratch, input);
- long rightCost = decode(rightScratch, input);
- if (hasPayloads) {
- decodePayload(leftScratch, input);
- decodePayload(rightScratch, input);
- }
- int cmp = comparator.compare(leftScratch, rightScratch);
- if (cmp != 0) {
- return cmp;
- }
- return Long.compare(leftCost, rightCost);
- }
- };
-
- private Sort.ByteSequencesReader sort() throws IOException {
- String prefix = getClass().getSimpleName();
- File directory = Sort.defaultTempDir();
- tempInput = File.createTempFile(prefix, ".input", directory);
- tempSorted = File.createTempFile(prefix, ".sorted", directory);
-
- final Sort.ByteSequencesWriter writer = new Sort.ByteSequencesWriter(tempInput);
- boolean success = false;
- try {
- BytesRef spare;
- byte[] buffer = new byte[0];
- ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
-
- while ((spare = source.next()) != null) {
- encode(writer, output, buffer, spare, source.payload(), source.weight());
- }
- writer.close();
- new Sort(tieBreakByCostComparator).sort(tempInput, tempSorted);
- ByteSequencesReader reader = new Sort.ByteSequencesReader(tempSorted);
- success = true;
- return reader;
-
- } finally {
- if (success) {
- IOUtils.close(writer);
- } else {
- try {
- IOUtils.closeWhileHandlingException(writer);
- } finally {
- close();
- }
- }
- }
- }
-
- private void close() throws IOException {
- IOUtils.close(reader);
- if (tempInput != null) {
- tempInput.delete();
- }
- if (tempSorted != null) {
- tempSorted.delete();
- }
- }
-
- /** encodes an entry (bytes+(payload)+weight) to the provided writer */
- protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, BytesRef payload, long weight) throws IOException {
- int requiredLength = spare.length + 8 + ((hasPayloads) ? 2 + payload.length : 0);
- if (requiredLength >= buffer.length) {
- buffer = ArrayUtil.grow(buffer, requiredLength);
- }
- output.reset(buffer);
- output.writeBytes(spare.bytes, spare.offset, spare.length);
- if (hasPayloads) {
- output.writeBytes(payload.bytes, payload.offset, payload.length);
- output.writeShort((short) payload.length);
- }
- output.writeLong(weight);
- writer.write(buffer, 0, output.getPosition());
- }
-
- /** decodes the weight at the current position */
- protected long decode(BytesRef scratch, ByteArrayDataInput tmpInput) {
- tmpInput.reset(scratch.bytes);
- tmpInput.skipBytes(scratch.length - 8); // suggestion
- scratch.length -= 8; // long
- return tmpInput.readLong();
- }
-
- /** decodes the payload at the current position */
- protected BytesRef decodePayload(BytesRef scratch, ByteArrayDataInput tmpInput) {
- tmpInput.reset(scratch.bytes);
- tmpInput.skipBytes(scratch.length - 2); // skip to payload size
- short payloadLength = tmpInput.readShort(); // read payload size
- tmpInput.setPosition(scratch.length - 2 - payloadLength); // setPosition to start of payload
- BytesRef payloadScratch = new BytesRef(payloadLength);
- tmpInput.readBytes(payloadScratch.bytes, 0, payloadLength); // read payload
- payloadScratch.length = payloadLength;
- scratch.length -= 2; // payload length info (short)
- scratch.length -= payloadLength; // payload
- return payloadScratch;
- }
-}
diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedInputIterator.java lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedInputIterator.java
new file mode 100644
index 0000000..4403fc1
--- /dev/null
+++ lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedInputIterator.java
@@ -0,0 +1,78 @@
+package org.apache.lucene.search.suggest;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Random;
+
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * This wrapper buffers the incoming elements and makes sure they are in
+ * random order.
+ * @lucene.experimental
+ */
+public class UnsortedInputIterator extends BufferedInputIterator {
+ // TODO keep this for now
+ private final int[] ords;
+ private int currentOrd = -1;
+ private final BytesRef spare = new BytesRef();
+ private final BytesRef payloadSpare = new BytesRef();
+ /**
+ * Creates a new iterator, wrapping the specified iterator and
+ * returning elements in a random order.
+ */
+ public UnsortedInputIterator(InputIterator source) throws IOException {
+ super(source);
+ ords = new int[entries.size()];
+ Random random = new Random();
+ for (int i = 0; i < ords.length; i++) {
+ ords[i] = i;
+ }
+ for (int i = 0; i < ords.length; i++) {
+ int randomPosition = random.nextInt(ords.length);
+ int temp = ords[i];
+ ords[i] = ords[randomPosition];
+ ords[randomPosition] = temp;
+ }
+ }
+
+ @Override
+ public long weight() {
+ assert currentOrd == ords[curPos];
+ return freqs[currentOrd];
+ }
+
+ @Override
+ public BytesRef next() throws IOException {
+ if (++curPos < entries.size()) {
+ currentOrd = ords[curPos];
+ return entries.get(spare, currentOrd);
+ }
+ return null;
+ }
+
+ @Override
+ public BytesRef payload() {
+ if (hasPayloads() && curPos < payloads.size()) {
+ assert currentOrd == ords[curPos];
+ return payloads.get(payloadSpare, currentOrd);
+ }
+ return null;
+ }
+}
diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqPayloadIteratorWrapper.java lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqPayloadIteratorWrapper.java
deleted file mode 100644
index 8aad73b..0000000
--- lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqPayloadIteratorWrapper.java
+++ /dev/null
@@ -1,79 +0,0 @@
-package org.apache.lucene.search.suggest;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.util.Random;
-
-import org.apache.lucene.search.spell.TermFreqPayloadIterator;
-import org.apache.lucene.util.BytesRef;
-
-/**
- * This wrapper buffers the incoming elements and makes sure they are in
- * random order.
- * @lucene.experimental
- */
-public class UnsortedTermFreqPayloadIteratorWrapper extends BufferingTermFreqPayloadIteratorWrapper {
- // TODO keep this for now
- private final int[] ords;
- private int currentOrd = -1;
- private final BytesRef spare = new BytesRef();
- private final BytesRef payloadSpare = new BytesRef();
- /**
- * Creates a new iterator, wrapping the specified iterator and
- * returning elements in a random order.
- */
- public UnsortedTermFreqPayloadIteratorWrapper(TermFreqPayloadIterator source) throws IOException {
- super(source);
- ords = new int[entries.size()];
- Random random = new Random();
- for (int i = 0; i < ords.length; i++) {
- ords[i] = i;
- }
- for (int i = 0; i < ords.length; i++) {
- int randomPosition = random.nextInt(ords.length);
- int temp = ords[i];
- ords[i] = ords[randomPosition];
- ords[randomPosition] = temp;
- }
- }
-
- @Override
- public long weight() {
- assert currentOrd == ords[curPos];
- return freqs[currentOrd];
- }
-
- @Override
- public BytesRef next() throws IOException {
- if (++curPos < entries.size()) {
- currentOrd = ords[curPos];
- return entries.get(spare, currentOrd);
- }
- return null;
- }
-
- @Override
- public BytesRef payload() {
- if (hasPayloads() && curPos < payloads.size()) {
- assert currentOrd == ords[curPos];
- return payloads.get(payloadSpare, currentOrd);
- }
- return null;
- }
-}
diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
index 27d73b8..7d388aa 100644
--- lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
+++ lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
@@ -65,8 +65,8 @@ import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
-import org.apache.lucene.search.spell.TermFreqPayloadIterator;
import org.apache.lucene.search.suggest.Lookup.LookupResult; // javadocs
+import org.apache.lucene.search.suggest.InputIterator;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
@@ -175,7 +175,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
}
@Override
- public void build(TermFreqPayloadIterator iter) throws IOException {
+ public void build(InputIterator iter) throws IOException {
if (searcher != null) {
searcher.getIndexReader().close();
diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
index 0b6ff71..ee681c7 100644
--- lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
+++ lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
@@ -31,7 +31,7 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.TokenStreamToAutomaton;
-import org.apache.lucene.search.spell.TermFreqPayloadIterator;
+import org.apache.lucene.search.suggest.InputIterator;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.Sort;
import org.apache.lucene.store.ByteArrayDataInput;
@@ -380,7 +380,7 @@ public class AnalyzingSuggester extends Lookup {
}
@Override
- public void build(TermFreqPayloadIterator iterator) throws IOException {
+ public void build(InputIterator iterator) throws IOException {
String prefix = getClass().getSimpleName();
File directory = Sort.defaultTempDir();
File tempInput = File.createTempFile(prefix, ".input", directory);
diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java
index cee929b..36c22a7 100644
--- lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java
+++ lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java
@@ -54,7 +54,7 @@ import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.search.spell.TermFreqPayloadIterator;
+import org.apache.lucene.search.suggest.InputIterator;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.Sort;
import org.apache.lucene.store.ByteArrayDataInput;
@@ -273,14 +273,14 @@ public class FreeTextSuggester extends Lookup {
}
@Override
- public void build(TermFreqPayloadIterator iterator) throws IOException {
+ public void build(InputIterator iterator) throws IOException {
build(iterator, IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB);
}
/** Build the suggest index, using up to the specified
* amount of temporary RAM while building. Note that
* the weights for the suggestions are ignored. */
- public void build(TermFreqPayloadIterator iterator, double ramBufferSizeMB) throws IOException {
+ public void build(InputIterator iterator, double ramBufferSizeMB) throws IOException {
if (iterator.hasPayloads()) {
throw new IllegalArgumentException("payloads are not supported");
}
diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java
index 2bc0aec..3dbf66d 100644
--- lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java
+++ lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java
@@ -24,7 +24,7 @@ import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
-import org.apache.lucene.search.spell.TermFreqPayloadIterator;
+import org.apache.lucene.search.suggest.InputIterator;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.Sort.SortInfo;
import org.apache.lucene.search.suggest.Sort;
@@ -42,7 +42,7 @@ import org.apache.lucene.util.fst.NoOutputs;
* An adapter from {@link Lookup} API to {@link FSTCompletion}.
*
*
This adapter differs from {@link FSTCompletion} in that it attempts
- * to discretize any "weights" as passed from in {@link TermFreqPayloadIterator#weight()}
+ * to discretize any "weights" as passed from in {@link InputIterator#weight()}
* to match the number of buckets. For the rationale for bucketing, see
* {@link FSTCompletion}.
*
@@ -95,7 +95,7 @@ public class FSTCompletionLookup extends Lookup {
/**
* This constructor prepares for creating a suggested FST using the
- * {@link #build(TermFreqPayloadIterator)} method. The number of weight
+ * {@link #build(InputIterator)} method. The number of weight
* discretization buckets is set to {@link FSTCompletion#DEFAULT_BUCKETS} and
* exact matches are promoted to the top of the suggestions list.
*/
@@ -105,7 +105,7 @@ public class FSTCompletionLookup extends Lookup {
/**
* This constructor prepares for creating a suggested FST using the
- * {@link #build(TermFreqPayloadIterator)} method.
+ * {@link #build(InputIterator)} method.
*
* @param buckets
* The number of weight discretization buckets (see
@@ -140,7 +140,7 @@ public class FSTCompletionLookup extends Lookup {
}
@Override
- public void build(TermFreqPayloadIterator tfit) throws IOException {
+ public void build(InputIterator tfit) throws IOException {
if (tfit.hasPayloads()) {
throw new IllegalArgumentException("this suggester doesn't support payloads");
}
diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java
index 982cab5..eb07356 100644
--- lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java
+++ lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java
@@ -25,10 +25,10 @@ import java.util.Collections;
import java.util.Comparator;
import java.util.List;
-import org.apache.lucene.search.spell.TermFreqPayloadIterator;
+import org.apache.lucene.search.suggest.InputIterator;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.Sort.ByteSequencesWriter;
-import org.apache.lucene.search.suggest.SortedTermFreqPayloadIteratorWrapper;
+import org.apache.lucene.search.suggest.SortedInputIterator;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.InputStreamDataInput;
@@ -92,12 +92,12 @@ public class WFSTCompletionLookup extends Lookup {
}
@Override
- public void build(TermFreqPayloadIterator iterator) throws IOException {
+ public void build(InputIterator iterator) throws IOException {
if (iterator.hasPayloads()) {
throw new IllegalArgumentException("this suggester doesn't support payloads");
}
BytesRef scratch = new BytesRef();
- TermFreqPayloadIterator iter = new WFSTTermFreqIteratorWrapper(iterator);
+ InputIterator iter = new WFSTInputIterator(iterator);
IntsRef scratchInts = new IntsRef();
BytesRef previous = null;
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
@@ -254,9 +254,9 @@ public class WFSTCompletionLookup extends Lookup {
return Integer.MAX_VALUE - (int)value;
}
- private final class WFSTTermFreqIteratorWrapper extends SortedTermFreqPayloadIteratorWrapper {
+ private final class WFSTInputIterator extends SortedInputIterator {
- WFSTTermFreqIteratorWrapper(TermFreqPayloadIterator source) throws IOException {
+ WFSTInputIterator(InputIterator source) throws IOException {
super(source);
assert source.hasPayloads() == false;
}
diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java
index 83ac512..0482e52 100644
--- lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java
+++ lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java
@@ -25,7 +25,7 @@ import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
-import org.apache.lucene.search.spell.TermFreqPayloadIterator;
+import org.apache.lucene.search.suggest.InputIterator;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.jaspell.JaspellTernarySearchTrie.TSTNode;
import org.apache.lucene.util.BytesRef;
@@ -46,12 +46,12 @@ public class JaspellLookup extends Lookup {
/**
* Creates a new empty trie
- * @see #build(TermFreqPayloadIterator)
+ * @see #build(InputIterator)
* */
public JaspellLookup() {}
@Override
- public void build(TermFreqPayloadIterator tfit) throws IOException {
+ public void build(InputIterator tfit) throws IOException {
if (tfit.hasPayloads()) {
throw new IllegalArgumentException("this suggester doesn't support payloads");
}
diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java
index 6eb173c..21ed3ad 100644
--- lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java
+++ lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java
@@ -25,9 +25,9 @@ import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
-import org.apache.lucene.search.spell.TermFreqPayloadIterator;
+import org.apache.lucene.search.suggest.InputIterator;
import org.apache.lucene.search.suggest.Lookup;
-import org.apache.lucene.search.suggest.SortedTermFreqPayloadIteratorWrapper;
+import org.apache.lucene.search.suggest.SortedInputIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IOUtils;
@@ -45,19 +45,19 @@ public class TSTLookup extends Lookup {
/**
* Creates a new TSTLookup with an empty Ternary Search Tree.
- * @see #build(TermFreqPayloadIterator)
+ * @see #build(InputIterator)
*/
public TSTLookup() {}
@Override
- public void build(TermFreqPayloadIterator tfit) throws IOException {
+ public void build(InputIterator tfit) throws IOException {
if (tfit.hasPayloads()) {
throw new IllegalArgumentException("this suggester doesn't support payloads");
}
root = new TernaryTreeNode();
// make sure it's sorted and the comparator uses UTF16 sort order
- tfit = new SortedTermFreqPayloadIteratorWrapper(tfit, BytesRef.getUTF8SortedAsUTF16Comparator());
+ tfit = new SortedInputIterator(tfit, BytesRef.getUTF8SortedAsUTF16Comparator());
ArrayList tokens = new ArrayList();
ArrayList vals = new ArrayList();
diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java
index efdd09c..85418ff 100644
--- lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java
+++ lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java
@@ -19,7 +19,6 @@ import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.spell.Dictionary;
-import org.apache.lucene.search.spell.TermFreqPayloadIterator;
import org.apache.lucene.search.suggest.DocumentDictionary;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
@@ -78,7 +77,7 @@ public class DocumentDictionaryTest extends LuceneTestCase {
writer.close();
IndexReader ir = DirectoryReader.open(dir);
Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME);
- TermFreqPayloadIterator tfp = (TermFreqPayloadIterator) dictionary.getWordsIterator();
+ InputIterator tfp = (InputIterator) dictionary.getWordsIterator();
BytesRef f;
while((f = tfp.next())!=null) {
Document doc = docs.remove(f.utf8ToString());
@@ -105,7 +104,7 @@ public class DocumentDictionaryTest extends LuceneTestCase {
writer.close();
IndexReader ir = DirectoryReader.open(dir);
Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME);
- TermFreqPayloadIterator tfp = (TermFreqPayloadIterator) dictionary.getWordsIterator();
+ InputIterator tfp = (InputIterator) dictionary.getWordsIterator();
BytesRef f;
while((f = tfp.next())!=null) {
Document doc = docs.remove(f.utf8ToString());
@@ -153,7 +152,7 @@ public class DocumentDictionaryTest extends LuceneTestCase {
IndexReader ir = DirectoryReader.open(dir);
assertEquals(ir.numDocs(), docs.size());
Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME);
- TermFreqPayloadIterator tfp = (TermFreqPayloadIterator) dictionary.getWordsIterator();
+ InputIterator tfp = (InputIterator) dictionary.getWordsIterator();
BytesRef f;
while((f = tfp.next())!=null) {
Document doc = docs.remove(f.utf8ToString());
diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/Input.java lucene/suggest/src/test/org/apache/lucene/search/suggest/Input.java
new file mode 100644
index 0000000..009f80c
--- /dev/null
+++ lucene/suggest/src/test/org/apache/lucene/search/suggest/Input.java
@@ -0,0 +1,55 @@
+package org.apache.lucene.search.suggest;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.BytesRef;
+
+/** corresponds to {@link InputIterator}'s entries */
+public final class Input {
+ public final BytesRef term;
+ public final long v;
+ public final BytesRef payload;
+ public final boolean hasPayloads;
+
+ public Input(BytesRef term, long v, BytesRef payload) {
+ this(term, v, payload, true);
+ }
+
+ public Input(String term, long v, BytesRef payload) {
+ this(new BytesRef(term), v, payload, true);
+ }
+
+ public Input(BytesRef term, long v) {
+ this(term, v, null, false);
+ }
+
+ public Input(String term, long v) {
+ this(new BytesRef(term), v, null, false);
+ }
+
+ public Input(BytesRef term, long v, BytesRef payload, boolean hasPayloads) {
+ this.term = term;
+ this.v = v;
+ this.payload = payload;
+ this.hasPayloads = hasPayloads;
+ }
+
+ public boolean hasPayloads() {
+ return hasPayloads;
+ }
+}
\ No newline at end of file
diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/InputArrayIterator.java lucene/suggest/src/test/org/apache/lucene/search/suggest/InputArrayIterator.java
new file mode 100644
index 0000000..edebb37
--- /dev/null
+++ lucene/suggest/src/test/org/apache/lucene/search/suggest/InputArrayIterator.java
@@ -0,0 +1,81 @@
+package org.apache.lucene.search.suggest;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Arrays;
+import java.util.Iterator;
+
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * A {@link InputIterator} over a sequence of {@link Input}s.
+ */
+public final class InputArrayIterator implements InputIterator {
+ private final Iterator i;
+ private final boolean hasPayloads;
+ private boolean first;
+ private Input current;
+ private final BytesRef spare = new BytesRef();
+
+ public InputArrayIterator(Iterator i) {
+ this.i = i;
+ if (i.hasNext()) {
+ current = i.next();
+ first = true;
+ this.hasPayloads = current.hasPayloads;
+ } else {
+ this.hasPayloads = false;
+ }
+ }
+
+ public InputArrayIterator(Input[] i) {
+ this(Arrays.asList(i));
+ }
+ public InputArrayIterator(Iterable i) {
+ this(i.iterator());
+ }
+
+ @Override
+ public long weight() {
+ return current.v;
+ }
+
+ @Override
+ public BytesRef next() {
+ if (i.hasNext() || (first && current!=null)) {
+ if (first) {
+ first = false;
+ } else {
+ current = i.next();
+ }
+ spare.copyBytes(current.term);
+ return spare;
+ }
+ return null;
+ }
+
+ @Override
+ public BytesRef payload() {
+ return current.payload;
+ }
+
+ @Override
+ public boolean hasPayloads() {
+ return hasPayloads;
+ }
+}
\ No newline at end of file
diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java
index f57d5d3..7858a23 100644
--- lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java
+++ lucene/suggest/src/test/org/apache/lucene/search/suggest/LookupBenchmarkTest.java
@@ -72,12 +72,12 @@ public class LookupBenchmarkTest extends LuceneTestCase {
/**
* Input term/weight pairs.
*/
- private static TermFreqPayload [] dictionaryInput;
+ private static Input [] dictionaryInput;
/**
* Benchmark term/weight pairs (randomized order).
*/
- private static List benchmarkInput;
+ private static List benchmarkInput;
/**
* Loads terms and frequencies from Wikipedia (cached).
@@ -85,9 +85,9 @@ public class LookupBenchmarkTest extends LuceneTestCase {
@BeforeClass
public static void setup() throws Exception {
assert false : "disable assertions before running benchmarks!";
- List input = readTop50KWiki();
+ List input = readTop50KWiki();
Collections.shuffle(input, random);
- LookupBenchmarkTest.dictionaryInput = input.toArray(new TermFreqPayload [input.size()]);
+ LookupBenchmarkTest.dictionaryInput = input.toArray(new Input [input.size()]);
Collections.shuffle(input, random);
LookupBenchmarkTest.benchmarkInput = input;
}
@@ -97,8 +97,8 @@ public class LookupBenchmarkTest extends LuceneTestCase {
/**
* Collect the multilingual input for benchmarks/ tests.
*/
- public static List readTop50KWiki() throws Exception {
- List input = new ArrayList();
+ public static List readTop50KWiki() throws Exception {
+ List input = new ArrayList();
URL resource = LookupBenchmarkTest.class.getResource("Top50KWiki.utf8");
assert resource != null : "Resource missing: Top50KWiki.utf8";
@@ -109,7 +109,7 @@ public class LookupBenchmarkTest extends LuceneTestCase {
assertTrue("No | separator?: " + line, tab >= 0);
int weight = Integer.parseInt(line.substring(tab + 1));
String key = line.substring(0, tab);
- input.add(new TermFreqPayload(key, weight));
+ input.add(new Input(key, weight));
}
br.close();
return input;
@@ -163,7 +163,7 @@ public class LookupBenchmarkTest extends LuceneTestCase {
/**
* Create {@link Lookup} instance and populate it.
*/
- private Lookup buildLookup(Class extends Lookup> cls, TermFreqPayload[] input) throws Exception {
+ private Lookup buildLookup(Class extends Lookup> cls, Input[] input) throws Exception {
Lookup lookup = null;
try {
lookup = cls.newInstance();
@@ -176,7 +176,7 @@ public class LookupBenchmarkTest extends LuceneTestCase {
lookup = ctor.newInstance(a);
}
}
- lookup.build(new TermFreqPayloadArrayIterator(input));
+ lookup.build(new InputArrayIterator(input));
return lookup;
}
@@ -220,7 +220,7 @@ public class LookupBenchmarkTest extends LuceneTestCase {
final Lookup lookup = buildLookup(cls, dictionaryInput);
final List input = new ArrayList(benchmarkInput.size());
- for (TermFreqPayload tf : benchmarkInput) {
+ for (Input tf : benchmarkInput) {
String s = tf.term.utf8ToString();
String sub = s.substring(0, Math.min(s.length(),
minPrefixLen + random.nextInt(maxPrefixLen - minPrefixLen + 1)));
diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java lucene/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java
index 2439857..f98f90f 100644
--- lucene/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java
+++ lucene/suggest/src/test/org/apache/lucene/search/suggest/PersistenceTest.java
@@ -65,10 +65,10 @@ public class PersistenceTest extends LuceneTestCase {
// Add all input keys.
Lookup lookup = lookupClass.newInstance();
- TermFreqPayload[] keys = new TermFreqPayload[this.keys.length];
+ Input[] keys = new Input[this.keys.length];
for (int i = 0; i < keys.length; i++)
- keys[i] = new TermFreqPayload(this.keys[i], i);
- lookup.build(new TermFreqPayloadArrayIterator(keys));
+ keys[i] = new Input(this.keys[i], i);
+ lookup.build(new InputArrayIterator(keys));
// Store the suggester.
File storeDir = TEMP_DIR;
@@ -81,7 +81,7 @@ public class PersistenceTest extends LuceneTestCase {
// Assert validity.
Random random = random();
long previous = Long.MIN_VALUE;
- for (TermFreqPayload k : keys) {
+ for (Input k : keys) {
List list = lookup.lookup(_TestUtil.bytesToCharSequence(k.term, random), false, 1);
assertEquals(1, list.size());
LookupResult lookupResult = list.get(0);
diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayload.java lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayload.java
deleted file mode 100644
index 5463a13..0000000
--- lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayload.java
+++ /dev/null
@@ -1,54 +0,0 @@
-package org.apache.lucene.search.suggest;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.util.BytesRef;
-
-public final class TermFreqPayload {
- public final BytesRef term;
- public final long v;
- public final BytesRef payload;
- public final boolean hasPayloads;
-
- public TermFreqPayload(BytesRef term, long v, BytesRef payload) {
- this(term, v, payload, true);
- }
-
- public TermFreqPayload(String term, long v, BytesRef payload) {
- this(new BytesRef(term), v, payload, true);
- }
-
- public TermFreqPayload(BytesRef term, long v) {
- this(term, v, null, false);
- }
-
- public TermFreqPayload(String term, long v) {
- this(new BytesRef(term), v, null, false);
- }
-
- public TermFreqPayload(BytesRef term, long v, BytesRef payload, boolean hasPayloads) {
- this.term = term;
- this.v = v;
- this.payload = payload;
- this.hasPayloads = hasPayloads;
- }
-
- public boolean hasPayloads() {
- return hasPayloads;
- }
-}
\ No newline at end of file
diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayloadArrayIterator.java lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayloadArrayIterator.java
deleted file mode 100644
index 6583f73..0000000
--- lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayloadArrayIterator.java
+++ /dev/null
@@ -1,82 +0,0 @@
-package org.apache.lucene.search.suggest;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.util.Arrays;
-import java.util.Iterator;
-
-import org.apache.lucene.search.spell.TermFreqPayloadIterator;
-import org.apache.lucene.util.BytesRef;
-
-/**
- * A {@link TermFreqPayloadIterator} over a sequence of {@link TermFreqPayload}s.
- */
-public final class TermFreqPayloadArrayIterator implements TermFreqPayloadIterator {
- private final Iterator i;
- private final boolean hasPayloads;
- private boolean first;
- private TermFreqPayload current;
- private final BytesRef spare = new BytesRef();
-
- public TermFreqPayloadArrayIterator(Iterator i) {
- this.i = i;
- if (i.hasNext()) {
- current = i.next();
- first = true;
- this.hasPayloads = current.hasPayloads;
- } else {
- this.hasPayloads = false;
- }
- }
-
- public TermFreqPayloadArrayIterator(TermFreqPayload[] i) {
- this(Arrays.asList(i));
- }
- public TermFreqPayloadArrayIterator(Iterable i) {
- this(i.iterator());
- }
-
- @Override
- public long weight() {
- return current.v;
- }
-
- @Override
- public BytesRef next() {
- if (i.hasNext() || (first && current!=null)) {
- if (first) {
- first = false;
- } else {
- current = i.next();
- }
- spare.copyBytes(current.term);
- return spare;
- }
- return null;
- }
-
- @Override
- public BytesRef payload() {
- return current.payload;
- }
-
- @Override
- public boolean hasPayloads() {
- return hasPayloads;
- }
-}
\ No newline at end of file
diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/TestInputIterator.java lucene/suggest/src/test/org/apache/lucene/search/suggest/TestInputIterator.java
new file mode 100644
index 0000000..b0c423d
--- /dev/null
+++ lucene/suggest/src/test/org/apache/lucene/search/suggest/TestInputIterator.java
@@ -0,0 +1,123 @@
+package org.apache.lucene.search.suggest;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+import java.util.AbstractMap.SimpleEntry;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Random;
+import java.util.TreeMap;
+
+import org.apache.lucene.store.ByteArrayDataOutput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefHash;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
+
+public class TestInputIterator extends LuceneTestCase {
+
+ public void testEmpty() throws Exception {
+ InputArrayIterator iterator = new InputArrayIterator(new Input[0]);
+ InputIterator wrapper = new SortedInputIterator(iterator, BytesRef.getUTF8SortedAsUnicodeComparator());
+ assertNull(wrapper.next());
+ wrapper = new UnsortedInputIterator(iterator);
+ assertNull(wrapper.next());
+ }
+
+ public void testTerms() throws Exception {
+ Random random = random();
+ int num = atLeast(10000);
+
+ Comparator comparator = random.nextBoolean() ? BytesRef.getUTF8SortedAsUnicodeComparator() : BytesRef.getUTF8SortedAsUTF16Comparator();
+ TreeMap> sorted = new TreeMap<>(comparator);
+ TreeMap sortedWithoutPayload = new TreeMap<>(comparator);
+ Input[] unsorted = new Input[num];
+ Input[] unsortedWithoutPayload = new Input[num];
+
+ for (int i = 0; i < num; i++) {
+ BytesRef key;
+ BytesRef payload;
+ do {
+ key = new BytesRef(_TestUtil.randomUnicodeString(random));
+ payload = new BytesRef(_TestUtil.randomUnicodeString(random));
+ } while (sorted.containsKey(key));
+ long value = random.nextLong();
+ sortedWithoutPayload.put(key, value);
+ sorted.put(key, new SimpleEntry<>(value, payload));
+ unsorted[i] = new Input(key, value, payload);
+ unsortedWithoutPayload[i] = new Input(key, value);
+ }
+
+ // test the sorted iterator wrapper with payloads
+ InputIterator wrapper = new SortedInputIterator(new InputArrayIterator(unsorted), comparator);
+ Iterator>> expected = sorted.entrySet().iterator();
+ while (expected.hasNext()) {
+ Map.Entry> entry = expected.next();
+
+ assertEquals(entry.getKey(), wrapper.next());
+ assertEquals(entry.getValue().getKey().longValue(), wrapper.weight());
+ assertEquals(entry.getValue().getValue(), wrapper.payload());
+ }
+ assertNull(wrapper.next());
+
+ // test the unsorted iterator wrapper with payloads
+ wrapper = new UnsortedInputIterator(new InputArrayIterator(unsorted));
+ TreeMap> actual = new TreeMap<>();
+ BytesRef key;
+ while ((key = wrapper.next()) != null) {
+ long value = wrapper.weight();
+ BytesRef payload = wrapper.payload();
+ actual.put(BytesRef.deepCopyOf(key), new SimpleEntry<>(value, BytesRef.deepCopyOf(payload)));
+ }
+ assertEquals(sorted, actual);
+
+ // test the sorted iterator wrapper without payloads
+ InputIterator wrapperWithoutPayload = new SortedInputIterator(new InputArrayIterator(unsortedWithoutPayload), comparator);
+ Iterator> expectedWithoutPayload = sortedWithoutPayload.entrySet().iterator();
+ while (expectedWithoutPayload.hasNext()) {
+ Map.Entry entry = expectedWithoutPayload.next();
+
+ assertEquals(entry.getKey(), wrapperWithoutPayload.next());
+ assertEquals(entry.getValue().longValue(), wrapperWithoutPayload.weight());
+ assertNull(wrapperWithoutPayload.payload());
+ }
+ assertNull(wrapperWithoutPayload.next());
+
+ // test the unsorted iterator wrapper without payloads
+ wrapperWithoutPayload = new UnsortedInputIterator(new InputArrayIterator(unsortedWithoutPayload));
+ TreeMap actualWithoutPayload = new TreeMap<>();
+ while ((key = wrapperWithoutPayload.next()) != null) {
+ long value = wrapperWithoutPayload.weight();
+ assertNull(wrapperWithoutPayload.payload());
+ actualWithoutPayload.put(BytesRef.deepCopyOf(key), value);
+ }
+ assertEquals(sortedWithoutPayload, actualWithoutPayload);
+ }
+
+ public static long asLong(BytesRef b) {
+ return (((long) asIntInternal(b, b.offset) << 32) | asIntInternal(b,
+ b.offset + 4) & 0xFFFFFFFFL);
+ }
+
+ private static int asIntInternal(BytesRef b, int pos) {
+ return ((b.bytes[pos++] & 0xFF) << 24) | ((b.bytes[pos++] & 0xFF) << 16)
+ | ((b.bytes[pos++] & 0xFF) << 8) | (b.bytes[pos] & 0xFF);
+ }
+}
diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqPayloadIterator.java lucene/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqPayloadIterator.java
deleted file mode 100644
index e7d8257..0000000
--- lucene/suggest/src/test/org/apache/lucene/search/suggest/TestTermFreqPayloadIterator.java
+++ /dev/null
@@ -1,124 +0,0 @@
-package org.apache.lucene.search.suggest;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with this
- * work for additional information regarding copyright ownership. The ASF
- * licenses this file to You under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations under
- * the License.
- */
-
-import java.util.AbstractMap.SimpleEntry;
-import java.util.Comparator;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.Random;
-import java.util.TreeMap;
-
-import org.apache.lucene.search.spell.TermFreqPayloadIterator;
-import org.apache.lucene.store.ByteArrayDataOutput;
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.BytesRefHash;
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util._TestUtil;
-
-public class TestTermFreqPayloadIterator extends LuceneTestCase {
-
- public void testEmpty() throws Exception {
- TermFreqPayloadArrayIterator iterator = new TermFreqPayloadArrayIterator(new TermFreqPayload[0]);
- TermFreqPayloadIterator wrapper = new SortedTermFreqPayloadIteratorWrapper(iterator, BytesRef.getUTF8SortedAsUnicodeComparator());
- assertNull(wrapper.next());
- wrapper = new UnsortedTermFreqPayloadIteratorWrapper(iterator);
- assertNull(wrapper.next());
- }
-
- public void testTerms() throws Exception {
- Random random = random();
- int num = atLeast(10000);
-
- Comparator comparator = random.nextBoolean() ? BytesRef.getUTF8SortedAsUnicodeComparator() : BytesRef.getUTF8SortedAsUTF16Comparator();
- TreeMap> sorted = new TreeMap<>(comparator);
- TreeMap sortedWithoutPayload = new TreeMap<>(comparator);
- TermFreqPayload[] unsorted = new TermFreqPayload[num];
- TermFreqPayload[] unsortedWithoutPayload = new TermFreqPayload[num];
-
- for (int i = 0; i < num; i++) {
- BytesRef key;
- BytesRef payload;
- do {
- key = new BytesRef(_TestUtil.randomUnicodeString(random));
- payload = new BytesRef(_TestUtil.randomUnicodeString(random));
- } while (sorted.containsKey(key));
- long value = random.nextLong();
- sortedWithoutPayload.put(key, value);
- sorted.put(key, new SimpleEntry<>(value, payload));
- unsorted[i] = new TermFreqPayload(key, value, payload);
- unsortedWithoutPayload[i] = new TermFreqPayload(key, value);
- }
-
- // test the sorted iterator wrapper with payloads
- TermFreqPayloadIterator wrapper = new SortedTermFreqPayloadIteratorWrapper(new TermFreqPayloadArrayIterator(unsorted), comparator);
- Iterator>> expected = sorted.entrySet().iterator();
- while (expected.hasNext()) {
- Map.Entry> entry = expected.next();
-
- assertEquals(entry.getKey(), wrapper.next());
- assertEquals(entry.getValue().getKey().longValue(), wrapper.weight());
- assertEquals(entry.getValue().getValue(), wrapper.payload());
- }
- assertNull(wrapper.next());
-
- // test the unsorted iterator wrapper with payloads
- wrapper = new UnsortedTermFreqPayloadIteratorWrapper(new TermFreqPayloadArrayIterator(unsorted));
- TreeMap> actual = new TreeMap<>();
- BytesRef key;
- while ((key = wrapper.next()) != null) {
- long value = wrapper.weight();
- BytesRef payload = wrapper.payload();
- actual.put(BytesRef.deepCopyOf(key), new SimpleEntry<>(value, BytesRef.deepCopyOf(payload)));
- }
- assertEquals(sorted, actual);
-
- // test the sorted iterator wrapper without payloads
- TermFreqPayloadIterator wrapperWithoutPayload = new SortedTermFreqPayloadIteratorWrapper(new TermFreqPayloadArrayIterator(unsortedWithoutPayload), comparator);
- Iterator> expectedWithoutPayload = sortedWithoutPayload.entrySet().iterator();
- while (expectedWithoutPayload.hasNext()) {
- Map.Entry entry = expectedWithoutPayload.next();
-
- assertEquals(entry.getKey(), wrapperWithoutPayload.next());
- assertEquals(entry.getValue().longValue(), wrapperWithoutPayload.weight());
- assertNull(wrapperWithoutPayload.payload());
- }
- assertNull(wrapperWithoutPayload.next());
-
- // test the unsorted iterator wrapper without payloads
- wrapperWithoutPayload = new UnsortedTermFreqPayloadIteratorWrapper(new TermFreqPayloadArrayIterator(unsortedWithoutPayload));
- TreeMap actualWithoutPayload = new TreeMap<>();
- while ((key = wrapperWithoutPayload.next()) != null) {
- long value = wrapperWithoutPayload.weight();
- assertNull(wrapperWithoutPayload.payload());
- actualWithoutPayload.put(BytesRef.deepCopyOf(key), value);
- }
- assertEquals(sortedWithoutPayload, actualWithoutPayload);
- }
-
- public static long asLong(BytesRef b) {
- return (((long) asIntInternal(b, b.offset) << 32) | asIntInternal(b,
- b.offset + 4) & 0xFFFFFFFFL);
- }
-
- private static int asIntInternal(BytesRef b, int pos) {
- return ((b.bytes[pos++] & 0xFF) << 24) | ((b.bytes[pos++] & 0xFF) << 16)
- | ((b.bytes[pos++] & 0xFF) << 8) | (b.bytes[pos] & 0xFF);
- }
-}
diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java
index 84c6227..8113721 100644
--- lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java
+++ lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java
@@ -34,8 +34,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
-import org.apache.lucene.search.suggest.TermFreqPayload;
-import org.apache.lucene.search.suggest.TermFreqPayloadArrayIterator;
+import org.apache.lucene.search.suggest.Input;
+import org.apache.lucene.search.suggest.InputArrayIterator;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
@@ -47,9 +47,9 @@ import org.apache.lucene.util._TestUtil;
public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
public void testBasic() throws Exception {
- TermFreqPayload keys[] = new TermFreqPayload[] {
- new TermFreqPayload("lend me your ear", 8, new BytesRef("foobar")),
- new TermFreqPayload("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
+ Input keys[] = new Input[] {
+ new Input("lend me your ear", 8, new BytesRef("foobar")),
+ new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
};
File tempDir = _TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
@@ -61,7 +61,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
return newDirectory();
}
};
- suggester.build(new TermFreqPayloadArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
List results = suggester.lookup(_TestUtil.stringToCharSequence("ear", random()), 10, true, true);
assertEquals(2, results.size());
@@ -95,9 +95,9 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
}
public void testAfterLoad() throws Exception {
- TermFreqPayload keys[] = new TermFreqPayload[] {
- new TermFreqPayload("lend me your ear", 8, new BytesRef("foobar")),
- new TermFreqPayload("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
+ Input keys[] = new Input[] {
+ new Input("lend me your ear", 8, new BytesRef("foobar")),
+ new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
};
File tempDir = _TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
@@ -109,7 +109,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
return newFSDirectory(path);
}
};
- suggester.build(new TermFreqPayloadArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
suggester.close();
suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) {
@@ -150,8 +150,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
@SuppressWarnings("unchecked")
public void testHighlightAsObject() throws Exception {
- TermFreqPayload keys[] = new TermFreqPayload[] {
- new TermFreqPayload("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
+ Input keys[] = new Input[] {
+ new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
};
File tempDir = _TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
@@ -204,7 +204,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
}
}
};
- suggester.build(new TermFreqPayloadArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
List results = suggester.lookup(_TestUtil.stringToCharSequence("ear", random()), 10, true, true);
assertEquals(1, results.size());
@@ -230,9 +230,9 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
}
public void testRandomMinPrefixLength() throws Exception {
- TermFreqPayload keys[] = new TermFreqPayload[] {
- new TermFreqPayload("lend me your ear", 8, new BytesRef("foobar")),
- new TermFreqPayload("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
+ Input keys[] = new Input[] {
+ new Input("lend me your ear", 8, new BytesRef("foobar")),
+ new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
};
File tempDir = _TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
@@ -245,7 +245,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
return newFSDirectory(path);
}
};
- suggester.build(new TermFreqPayloadArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
for(int i=0;i<2;i++) {
for(int j=0;j<2;j++) {
@@ -312,8 +312,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
}
public void testHighlight() throws Exception {
- TermFreqPayload keys[] = new TermFreqPayload[] {
- new TermFreqPayload("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
+ Input keys[] = new Input[] {
+ new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
};
File tempDir = _TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
@@ -325,7 +325,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
return newDirectory();
}
};
- suggester.build(new TermFreqPayloadArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
List results = suggester.lookup(_TestUtil.stringToCharSequence("penn", random()), 10, true, true);
assertEquals(1, results.size());
assertEquals("a penny saved is a penny earned", results.get(0).key);
@@ -333,8 +333,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
}
public void testHighlightCaseChange() throws Exception {
- TermFreqPayload keys[] = new TermFreqPayload[] {
- new TermFreqPayload("a Penny saved is a penny earned", 10, new BytesRef("foobaz")),
+ Input keys[] = new Input[] {
+ new Input("a Penny saved is a penny earned", 10, new BytesRef("foobaz")),
};
File tempDir = _TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
@@ -346,7 +346,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
return newDirectory();
}
};
- suggester.build(new TermFreqPayloadArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
List results = suggester.lookup(_TestUtil.stringToCharSequence("penn", random()), 10, true, true);
assertEquals(1, results.size());
assertEquals("a Penny saved is a penny earned", results.get(0).key);
@@ -367,7 +367,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
return newDirectory();
}
};
- suggester.build(new TermFreqPayloadArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
results = suggester.lookup(_TestUtil.stringToCharSequence("penn", random()), 10, true, true);
assertEquals(1, results.size());
assertEquals("a Penny saved is a penny earned", results.get(0).key);
@@ -375,8 +375,8 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
}
public void testDoubleClose() throws Exception {
- TermFreqPayload keys[] = new TermFreqPayload[] {
- new TermFreqPayload("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
+ Input keys[] = new Input[] {
+ new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")),
};
File tempDir = _TestUtil.getTempDir("AnalyzingInfixSuggesterTest");
@@ -388,7 +388,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
return newDirectory();
}
};
- suggester.build(new TermFreqPayloadArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
suggester.close();
suggester.close();
}
@@ -422,11 +422,11 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
}
};
- TermFreqPayload keys[] = new TermFreqPayload[] {
- new TermFreqPayload("a bob for apples", 10, new BytesRef("foobaz")),
+ Input keys[] = new Input[] {
+ new Input("a bob for apples", 10, new BytesRef("foobaz")),
};
- suggester.build(new TermFreqPayloadArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
List results = suggester.lookup(_TestUtil.stringToCharSequence("a", random()), 10, true, true);
assertEquals(1, results.size());
assertEquals("a bob for apples", results.get(0).key);
diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java
index f367f36..eca1d26 100644
--- lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java
+++ lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java
@@ -52,8 +52,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
-import org.apache.lucene.search.suggest.TermFreqPayload;
-import org.apache.lucene.search.suggest.TermFreqPayloadArrayIterator;
+import org.apache.lucene.search.suggest.Input;
+import org.apache.lucene.search.suggest.InputArrayIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
@@ -63,18 +63,18 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
/** this is basically the WFST test ported to KeywordAnalyzer. so it acts the same */
public void testKeyword() throws Exception {
- Iterable keys = shuffle(
- new TermFreqPayload("foo", 50),
- new TermFreqPayload("bar", 10),
- new TermFreqPayload("barbar", 10),
- new TermFreqPayload("barbar", 12),
- new TermFreqPayload("barbara", 6),
- new TermFreqPayload("bar", 5),
- new TermFreqPayload("barbara", 1)
+ Iterable keys = shuffle(
+ new Input("foo", 50),
+ new Input("bar", 10),
+ new Input("barbar", 10),
+ new Input("barbar", 12),
+ new Input("barbara", 6),
+ new Input("bar", 5),
+ new Input("barbara", 1)
);
AnalyzingSuggester suggester = new AnalyzingSuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
- suggester.build(new TermFreqPayloadArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
// top N of 2, but only foo is available
List results = suggester.lookup(_TestUtil.stringToCharSequence("f", random()), false, 2);
@@ -109,16 +109,16 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
}
public void testKeywordWithPayloads() throws Exception {
- Iterable keys = shuffle(
- new TermFreqPayload("foo", 50, new BytesRef("hello")),
- new TermFreqPayload("bar", 10, new BytesRef("goodbye")),
- new TermFreqPayload("barbar", 12, new BytesRef("thank you")),
- new TermFreqPayload("bar", 9, new BytesRef("should be deduplicated")),
- new TermFreqPayload("bar", 8, new BytesRef("should also be deduplicated")),
- new TermFreqPayload("barbara", 6, new BytesRef("for all the fish")));
+ Iterable keys = shuffle(
+ new Input("foo", 50, new BytesRef("hello")),
+ new Input("bar", 10, new BytesRef("goodbye")),
+ new Input("barbar", 12, new BytesRef("thank you")),
+ new Input("bar", 9, new BytesRef("should be deduplicated")),
+ new Input("bar", 8, new BytesRef("should also be deduplicated")),
+ new Input("barbara", 6, new BytesRef("for all the fish")));
AnalyzingSuggester suggester = new AnalyzingSuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
- suggester.build(new TermFreqPayloadArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
for (int i = 0; i < 2; i++) {
// top N of 2, but only foo is available
List results = suggester.lookup(_TestUtil.stringToCharSequence("f", random()), false, 2);
@@ -163,14 +163,14 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
public void testRandomRealisticKeys() throws IOException {
LineFileDocs lineFile = new LineFileDocs(random());
Map mapping = new HashMap<>();
- List keys = new ArrayList<>();
+ List keys = new ArrayList<>();
int howMany = atLeast(100); // this might bring up duplicates
for (int i = 0; i < howMany; i++) {
Document nextDoc = lineFile.nextDoc();
String title = nextDoc.getField("title").stringValue();
int randomWeight = random().nextInt(100);
- keys.add(new TermFreqPayload(title, randomWeight));
+ keys.add(new Input(title, randomWeight));
if (!mapping.containsKey(title) || mapping.get(title) < randomWeight) {
mapping.put(title, Long.valueOf(randomWeight));
}
@@ -180,16 +180,16 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
analyzingSuggester.setPreservePositionIncrements(random().nextBoolean());
boolean doPayloads = random().nextBoolean();
if (doPayloads) {
- List keysAndPayloads = new ArrayList<>();
- for (TermFreqPayload termFreq : keys) {
- keysAndPayloads.add(new TermFreqPayload(termFreq.term, termFreq.v, new BytesRef(Long.toString(termFreq.v))));
+ List keysAndPayloads = new ArrayList<>();
+ for (Input termFreq : keys) {
+ keysAndPayloads.add(new Input(termFreq.term, termFreq.v, new BytesRef(Long.toString(termFreq.v))));
}
- analyzingSuggester.build(new TermFreqPayloadArrayIterator(keysAndPayloads));
+ analyzingSuggester.build(new InputArrayIterator(keysAndPayloads));
} else {
- analyzingSuggester.build(new TermFreqPayloadArrayIterator(keys));
+ analyzingSuggester.build(new InputArrayIterator(keys));
}
- for (TermFreqPayload termFreq : keys) {
+ for (Input termFreq : keys) {
List lookup = analyzingSuggester.lookup(termFreq.term.utf8ToString(), false, keys.size());
for (LookupResult lookupResult : lookup) {
assertEquals(mapping.get(lookupResult.key), Long.valueOf(lookupResult.value));
@@ -209,14 +209,14 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
* basic "standardanalyzer" test with stopword removal
*/
public void testStandard() throws Exception {
- TermFreqPayload keys[] = new TermFreqPayload[] {
- new TermFreqPayload("the ghost of christmas past", 50),
+ Input keys[] = new Input[] {
+ new Input("the ghost of christmas past", 50),
};
Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
AnalyzingSuggester suggester = new AnalyzingSuggester(standard);
suggester.setPreservePositionIncrements(false);
- suggester.build(new TermFreqPayloadArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
List results = suggester.lookup(_TestUtil.stringToCharSequence("the ghost of chris", random()), false, 1);
assertEquals(1, results.size());
@@ -239,23 +239,23 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
public void testEmpty() throws Exception {
Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
AnalyzingSuggester suggester = new AnalyzingSuggester(standard);
- suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[0]));
+ suggester.build(new InputArrayIterator(new Input[0]));
List result = suggester.lookup("a", false, 20);
assertTrue(result.isEmpty());
}
public void testNoSeps() throws Exception {
- TermFreqPayload[] keys = new TermFreqPayload[] {
- new TermFreqPayload("ab cd", 0),
- new TermFreqPayload("abcd", 1),
+ Input[] keys = new Input[] {
+ new Input("ab cd", 0),
+ new Input("abcd", 1),
};
int options = 0;
Analyzer a = new MockAnalyzer(random());
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, options, 256, -1);
- suggester.build(new TermFreqPayloadArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
// TODO: would be nice if "ab " would allow the test to
// pass, and more generally if the analyzer can know
// that the user's current query has ended at a word,
@@ -316,13 +316,13 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
}
};
- TermFreqPayload keys[] = new TermFreqPayload[] {
- new TermFreqPayload("wifi network is slow", 50),
- new TermFreqPayload("wi fi network is fast", 10),
+ Input keys[] = new Input[] {
+ new Input("wifi network is slow", 50),
+ new Input("wi fi network is fast", 10),
};
//AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer, AnalyzingSuggester.EXACT_FIRST, 256, -1);
AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer);
- suggester.build(new TermFreqPayloadArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
List results = suggester.lookup("wifi network", false, 10);
if (VERBOSE) {
System.out.println("Results: " + results);
@@ -382,12 +382,12 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
}
};
- TermFreqPayload keys[] = new TermFreqPayload[] {
- new TermFreqPayload("ab xc", 50),
- new TermFreqPayload("ba xd", 50),
+ Input keys[] = new Input[] {
+ new Input("ab xc", 50),
+ new Input("ba xd", 50),
};
AnalyzingSuggester suggester = new AnalyzingSuggester(analyzer);
- suggester.build(new TermFreqPayloadArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
List results = suggester.lookup("ab x", false, 1);
assertTrue(results.size() == 1);
}
@@ -460,11 +460,11 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
Analyzer a = getUnusualAnalyzer();
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1);
- suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
- new TermFreqPayload("x y", 1),
- new TermFreqPayload("x y z", 3),
- new TermFreqPayload("x", 2),
- new TermFreqPayload("z z z", 20),
+ suggester.build(new InputArrayIterator(new Input[] {
+ new Input("x y", 1),
+ new Input("x y z", 3),
+ new Input("x", 2),
+ new Input("z z z", 20),
}));
//System.out.println("ALL: " + suggester.lookup("x y", false, 6));
@@ -500,11 +500,11 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
Analyzer a = getUnusualAnalyzer();
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1);
- suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
- new TermFreqPayload("x y", 1),
- new TermFreqPayload("x y z", 3),
- new TermFreqPayload("x", 2),
- new TermFreqPayload("z z z", 20),
+ suggester.build(new InputArrayIterator(new Input[] {
+ new Input("x y", 1),
+ new Input("x y z", 3),
+ new Input("x", 2),
+ new Input("z z z", 20),
}));
for(int topN=1;topN<6;topN++) {
@@ -655,12 +655,12 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
boolean doPayloads = random().nextBoolean();
- TermFreqPayload[] keys = null;
- TermFreqPayload[] payloadKeys = null;
+ Input[] keys = null;
+ Input[] payloadKeys = null;
if (doPayloads) {
- payloadKeys = new TermFreqPayload[numQueries];
+ payloadKeys = new Input[numQueries];
} else {
- keys = new TermFreqPayload[numQueries];
+ keys = new Input[numQueries];
}
boolean preserveSep = random().nextBoolean();
@@ -731,9 +731,9 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
byte[] bytes = new byte[random().nextInt(10)];
random().nextBytes(bytes);
payload = new BytesRef(bytes);
- payloadKeys[i] = new TermFreqPayload(key, weight, payload);
+ payloadKeys[i] = new Input(key, weight, payload);
} else {
- keys[i] = new TermFreqPayload(key, weight);
+ keys[i] = new Input(key, weight);
payload = null;
}
@@ -754,9 +754,9 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a,
preserveSep ? AnalyzingSuggester.PRESERVE_SEP : 0, 256, -1);
if (doPayloads) {
- suggester.build(new TermFreqPayloadArrayIterator(shuffle(payloadKeys)));
+ suggester.build(new InputArrayIterator(shuffle(payloadKeys)));
} else {
- suggester.build(new TermFreqPayloadArrayIterator(shuffle(keys)));
+ suggester.build(new InputArrayIterator(shuffle(keys)));
}
for (String prefix : allPrefixes) {
@@ -874,8 +874,8 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
public void testMaxSurfaceFormsPerAnalyzedForm() throws Exception {
Analyzer a = new MockAnalyzer(random());
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 2, -1);
- suggester.build(new TermFreqPayloadArrayIterator(shuffle(new TermFreqPayload("a", 40),
- new TermFreqPayload("a ", 50), new TermFreqPayload(" a", 60))));
+ suggester.build(new InputArrayIterator(shuffle(new Input("a", 40),
+ new Input("a ", 50), new Input(" a", 60))));
List results = suggester.lookup("a", false, 5);
assertEquals(2, results.size());
@@ -889,11 +889,11 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
Analyzer a = new MockAnalyzer(random());
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.EXACT_FIRST, 256, -1);
- suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
- new TermFreqPayload("a", 2),
- new TermFreqPayload("a b c", 3),
- new TermFreqPayload("a c a", 1),
- new TermFreqPayload("a c b", 1),
+ suggester.build(new InputArrayIterator(new Input[] {
+ new Input("a", 2),
+ new Input("a b c", 3),
+ new Input("a c a", 1),
+ new Input("a c b", 1),
}));
suggester.lookup("a", false, 4);
@@ -905,10 +905,10 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.EXACT_FIRST, 256, -1);
- suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
- new TermFreqPayload("a", 5),
- new TermFreqPayload("a b", 3),
- new TermFreqPayload("a c", 4),
+ suggester.build(new InputArrayIterator(new Input[] {
+ new Input("a", 5),
+ new Input("a b", 3),
+ new Input("a c", 4),
}));
List results = suggester.lookup("a", false, 3);
@@ -970,9 +970,9 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1);
- suggester.build(new TermFreqPayloadArrayIterator(shuffle(
- new TermFreqPayload("hambone", 6),
- new TermFreqPayload("nellie", 5))));
+ suggester.build(new InputArrayIterator(shuffle(
+ new Input("hambone", 6),
+ new Input("nellie", 5))));
List results = suggester.lookup("nellie", false, 2);
assertEquals(2, results.size());
@@ -1039,9 +1039,9 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1);
- suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
- new TermFreqPayload("a", 6),
- new TermFreqPayload("b", 5),
+ suggester.build(new InputArrayIterator(new Input[] {
+ new Input("a", 6),
+ new Input("b", 5),
}));
List results = suggester.lookup("a", false, 2);
@@ -1112,21 +1112,21 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1);
- suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
- new TermFreqPayload("a a", 50),
- new TermFreqPayload("a b", 50),
+ suggester.build(new InputArrayIterator(new Input[] {
+ new Input("a a", 50),
+ new Input("a b", 50),
}));
}
public void testDupSurfaceFormsMissingResults3() throws Exception {
Analyzer a = new MockAnalyzer(random());
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1);
- suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
- new TermFreqPayload("a a", 7),
- new TermFreqPayload("a a", 7),
- new TermFreqPayload("a c", 6),
- new TermFreqPayload("a c", 3),
- new TermFreqPayload("a b", 5),
+ suggester.build(new InputArrayIterator(new Input[] {
+ new Input("a a", 7),
+ new Input("a a", 7),
+ new Input("a c", 6),
+ new Input("a c", 3),
+ new Input("a b", 5),
}));
assertEquals("[a a/7, a c/6, a b/5]", suggester.lookup("a", false, 3).toString());
}
@@ -1134,9 +1134,9 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
public void testEndingSpace() throws Exception {
Analyzer a = new MockAnalyzer(random());
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1);
- suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
- new TermFreqPayload("i love lucy", 7),
- new TermFreqPayload("isla de muerta", 8),
+ suggester.build(new InputArrayIterator(new Input[] {
+ new Input("i love lucy", 7),
+ new Input("isla de muerta", 8),
}));
assertEquals("[isla de muerta/8, i love lucy/7]", suggester.lookup("i", false, 3).toString());
assertEquals("[i love lucy/7]", suggester.lookup("i ", false, 3).toString());
@@ -1167,15 +1167,15 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
};
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, 1);
- suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {new TermFreqPayload("a", 1)}));
+ suggester.build(new InputArrayIterator(new Input[] {new Input("a", 1)}));
assertEquals("[a/1]", suggester.lookup("a", false, 1).toString());
}
public void testIllegalLookupArgument() throws Exception {
Analyzer a = new MockAnalyzer(random());
AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1);
- suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
- new TermFreqPayload("а где Люси?", 7),
+ suggester.build(new InputArrayIterator(new Input[] {
+ new Input("а где Люси?", 7),
}));
try {
suggester.lookup("а\u001E", false, 3);
diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java
index e65f2bc..3d24856 100644
--- lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java
+++ lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java
@@ -41,8 +41,8 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
-import org.apache.lucene.search.suggest.TermFreqPayload;
-import org.apache.lucene.search.suggest.TermFreqPayloadArrayIterator;
+import org.apache.lucene.search.suggest.Input;
+import org.apache.lucene.search.suggest.InputArrayIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.LuceneTestCase;
@@ -54,16 +54,16 @@ import org.apache.lucene.util.fst.Util;
public class FuzzySuggesterTest extends LuceneTestCase {
public void testRandomEdits() throws IOException {
- List keys = new ArrayList();
+ List keys = new ArrayList();
int numTerms = atLeast(100);
for (int i = 0; i < numTerms; i++) {
- keys.add(new TermFreqPayload("boo" + _TestUtil.randomSimpleString(random()), 1 + random().nextInt(100)));
+ keys.add(new Input("boo" + _TestUtil.randomSimpleString(random()), 1 + random().nextInt(100)));
}
- keys.add(new TermFreqPayload("foo bar boo far", 12));
+ keys.add(new Input("foo bar boo far", 12));
MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
FuzzySuggester suggester = new FuzzySuggester(analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS,
0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, FuzzySuggester.DEFAULT_UNICODE_AWARE);
- suggester.build(new TermFreqPayloadArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
int numIters = atLeast(10);
for (int i = 0; i < numIters; i++) {
String addRandomEdit = addRandomEdit("foo bar boo", FuzzySuggester.DEFAULT_NON_FUZZY_PREFIX);
@@ -75,16 +75,16 @@ public class FuzzySuggesterTest extends LuceneTestCase {
}
public void testNonLatinRandomEdits() throws IOException {
- List keys = new ArrayList();
+ List keys = new ArrayList();
int numTerms = atLeast(100);
for (int i = 0; i < numTerms; i++) {
- keys.add(new TermFreqPayload("буу" + _TestUtil.randomSimpleString(random()), 1 + random().nextInt(100)));
+ keys.add(new Input("буу" + _TestUtil.randomSimpleString(random()), 1 + random().nextInt(100)));
}
- keys.add(new TermFreqPayload("фуу бар буу фар", 12));
+ keys.add(new Input("фуу бар буу фар", 12));
MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
FuzzySuggester suggester = new FuzzySuggester(analyzer, analyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS,
0, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, true);
- suggester.build(new TermFreqPayloadArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
int numIters = atLeast(10);
for (int i = 0; i < numIters; i++) {
String addRandomEdit = addRandomEdit("фуу бар буу", 0);
@@ -97,15 +97,15 @@ public class FuzzySuggesterTest extends LuceneTestCase {
/** this is basically the WFST test ported to KeywordAnalyzer. so it acts the same */
public void testKeyword() throws Exception {
- TermFreqPayload keys[] = new TermFreqPayload[] {
- new TermFreqPayload("foo", 50),
- new TermFreqPayload("bar", 10),
- new TermFreqPayload("barbar", 12),
- new TermFreqPayload("barbara", 6)
+ Input keys[] = new Input[] {
+ new Input("foo", 50),
+ new Input("bar", 10),
+ new Input("barbar", 12),
+ new Input("barbara", 6)
};
FuzzySuggester suggester = new FuzzySuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
- suggester.build(new TermFreqPayloadArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
List results = suggester.lookup(_TestUtil.stringToCharSequence("bariar", random()), false, 2);
assertEquals(2, results.size());
@@ -172,14 +172,14 @@ public class FuzzySuggesterTest extends LuceneTestCase {
* basic "standardanalyzer" test with stopword removal
*/
public void testStandard() throws Exception {
- TermFreqPayload keys[] = new TermFreqPayload[] {
- new TermFreqPayload("the ghost of christmas past", 50),
+ Input keys[] = new Input[] {
+ new Input("the ghost of christmas past", 50),
};
Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
FuzzySuggester suggester = new FuzzySuggester(standard);
suggester.setPreservePositionIncrements(false);
- suggester.build(new TermFreqPayloadArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
List results = suggester.lookup(_TestUtil.stringToCharSequence("the ghost of chris", random()), false, 1);
assertEquals(1, results.size());
@@ -200,16 +200,16 @@ public class FuzzySuggesterTest extends LuceneTestCase {
}
public void testNoSeps() throws Exception {
- TermFreqPayload[] keys = new TermFreqPayload[] {
- new TermFreqPayload("ab cd", 0),
- new TermFreqPayload("abcd", 1),
+ Input[] keys = new Input[] {
+ new Input("ab cd", 0),
+ new Input("abcd", 1),
};
int options = 0;
Analyzer a = new MockAnalyzer(random());
FuzzySuggester suggester = new FuzzySuggester(a, a, options, 256, -1, 1, true, 1, 3, false);
- suggester.build(new TermFreqPayloadArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
// TODO: would be nice if "ab " would allow the test to
// pass, and more generally if the analyzer can know
// that the user's current query has ended at a word,
@@ -270,12 +270,12 @@ public class FuzzySuggesterTest extends LuceneTestCase {
}
};
- TermFreqPayload keys[] = new TermFreqPayload[] {
- new TermFreqPayload("wifi network is slow", 50),
- new TermFreqPayload("wi fi network is fast", 10),
+ Input keys[] = new Input[] {
+ new Input("wifi network is slow", 50),
+ new Input("wi fi network is fast", 10),
};
FuzzySuggester suggester = new FuzzySuggester(analyzer);
- suggester.build(new TermFreqPayloadArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
List results = suggester.lookup("wifi network", false, 10);
if (VERBOSE) {
@@ -290,7 +290,7 @@ public class FuzzySuggesterTest extends LuceneTestCase {
public void testEmpty() throws Exception {
FuzzySuggester suggester = new FuzzySuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
- suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[0]));
+ suggester.build(new InputArrayIterator(new Input[0]));
List result = suggester.lookup("a", false, 20);
assertTrue(result.isEmpty());
@@ -344,12 +344,12 @@ public class FuzzySuggesterTest extends LuceneTestCase {
}
};
- TermFreqPayload keys[] = new TermFreqPayload[] {
- new TermFreqPayload("ab xc", 50),
- new TermFreqPayload("ba xd", 50),
+ Input keys[] = new Input[] {
+ new Input("ab xc", 50),
+ new Input("ba xd", 50),
};
FuzzySuggester suggester = new FuzzySuggester(analyzer);
- suggester.build(new TermFreqPayloadArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
List results = suggester.lookup("ab x", false, 1);
assertTrue(results.size() == 1);
}
@@ -418,11 +418,11 @@ public class FuzzySuggesterTest extends LuceneTestCase {
Analyzer a = getUnusualAnalyzer();
FuzzySuggester suggester = new FuzzySuggester(a, a, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1, 1, true, 1, 3, false);
- suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
- new TermFreqPayload("x y", 1),
- new TermFreqPayload("x y z", 3),
- new TermFreqPayload("x", 2),
- new TermFreqPayload("z z z", 20),
+ suggester.build(new InputArrayIterator(new Input[] {
+ new Input("x y", 1),
+ new Input("x y z", 3),
+ new Input("x", 2),
+ new Input("z z z", 20),
}));
//System.out.println("ALL: " + suggester.lookup("x y", false, 6));
@@ -458,11 +458,11 @@ public class FuzzySuggesterTest extends LuceneTestCase {
Analyzer a = getUnusualAnalyzer();
FuzzySuggester suggester = new FuzzySuggester(a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1, 1, true, 1, 3, false);
- suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
- new TermFreqPayload("x y", 1),
- new TermFreqPayload("x y z", 3),
- new TermFreqPayload("x", 2),
- new TermFreqPayload("z z z", 20),
+ suggester.build(new InputArrayIterator(new Input[] {
+ new Input("x y", 1),
+ new Input("x y z", 3),
+ new Input("x", 2),
+ new Input("z z z", 20),
}));
for(int topN=1;topN<6;topN++) {
@@ -600,7 +600,7 @@ public class FuzzySuggesterTest extends LuceneTestCase {
final TreeSet allPrefixes = new TreeSet();
final Set seen = new HashSet();
- TermFreqPayload[] keys = new TermFreqPayload[numQueries];
+ Input[] keys = new Input[numQueries];
boolean preserveSep = random().nextBoolean();
boolean unicodeAware = random().nextBoolean();
@@ -666,7 +666,7 @@ public class FuzzySuggesterTest extends LuceneTestCase {
}
// we can probably do Integer.MAX_VALUE here, but why worry.
int weight = random().nextInt(1<<24);
- keys[i] = new TermFreqPayload(key, weight);
+ keys[i] = new Input(key, weight);
slowCompletor.add(new TermFreqPayload2(key, analyzedKey, weight));
}
@@ -684,7 +684,7 @@ public class FuzzySuggesterTest extends LuceneTestCase {
Analyzer a = new MockTokenEatingAnalyzer(numStopChars, preserveHoles);
FuzzySuggester suggester = new FuzzySuggester(a, a,
preserveSep ? AnalyzingSuggester.PRESERVE_SEP : 0, 256, -1, 1, false, 1, 3, unicodeAware);
- suggester.build(new TermFreqPayloadArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
for (String prefix : allPrefixes) {
@@ -825,14 +825,14 @@ public class FuzzySuggesterTest extends LuceneTestCase {
Analyzer a = new MockAnalyzer(random());
FuzzySuggester suggester = new FuzzySuggester(a, a, 0, 2, -1, 1, true, 1, 3, false);
- List keys = Arrays.asList(new TermFreqPayload[] {
- new TermFreqPayload("a", 40),
- new TermFreqPayload("a ", 50),
- new TermFreqPayload(" a", 60),
+ List keys = Arrays.asList(new Input[] {
+ new Input("a", 40),
+ new Input("a ", 50),
+ new Input(" a", 60),
});
Collections.shuffle(keys, random());
- suggester.build(new TermFreqPayloadArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
List results = suggester.lookup("a", false, 5);
assertEquals(2, results.size());
@@ -846,15 +846,15 @@ public class FuzzySuggesterTest extends LuceneTestCase {
Analyzer a = new MockAnalyzer(random());
FuzzySuggester suggester = new FuzzySuggester(a, a, FuzzySuggester.PRESERVE_SEP, 2, -1, 2, true, 1, 3, false);
- List keys = Arrays.asList(new TermFreqPayload[] {
- new TermFreqPayload("foo bar", 40),
- new TermFreqPayload("foo bar baz", 50),
- new TermFreqPayload("barbaz", 60),
- new TermFreqPayload("barbazfoo", 10),
+ List keys = Arrays.asList(new Input[] {
+ new Input("foo bar", 40),
+ new Input("foo bar baz", 50),
+ new Input("barbaz", 60),
+ new Input("barbazfoo", 10),
});
Collections.shuffle(keys, random());
- suggester.build(new TermFreqPayloadArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
assertEquals("[foo bar baz/50, foo bar/40]", suggester.lookup("foobar", false, 5).toString());
assertEquals("[foo bar baz/50]", suggester.lookup("foobarbaz", false, 5).toString());
@@ -929,25 +929,25 @@ public class FuzzySuggesterTest extends LuceneTestCase {
public void testRandom2() throws Throwable {
final int NUM = atLeast(200);
- final List answers = new ArrayList();
+ final List answers = new ArrayList();
final Set seen = new HashSet();
for(int i=0;i() {
+ Collections.sort(answers, new Comparator() {
@Override
- public int compare(TermFreqPayload a, TermFreqPayload b) {
+ public int compare(Input a, Input b) {
return a.term.compareTo(b.term);
}
});
if (VERBOSE) {
System.out.println("\nTEST: targets");
- for(TermFreqPayload tf : answers) {
+ for(Input tf : answers) {
System.out.println(" " + tf.term.utf8ToString() + " freq=" + tf.v);
}
}
@@ -965,7 +965,7 @@ public class FuzzySuggesterTest extends LuceneTestCase {
}
Collections.shuffle(answers, random());
- suggest.build(new TermFreqPayloadArrayIterator(answers.toArray(new TermFreqPayload[answers.size()])));
+ suggest.build(new InputArrayIterator(answers.toArray(new Input[answers.size()])));
final int ITERS = atLeast(100);
for(int iter=0;iter slowFuzzyMatch(int prefixLen, int maxEdits, boolean allowTransposition, List answers, String frag) {
+ private List slowFuzzyMatch(int prefixLen, int maxEdits, boolean allowTransposition, List answers, String frag) {
final List results = new ArrayList();
final int fragLen = frag.length();
- for(TermFreqPayload tf : answers) {
+ for(Input tf : answers) {
//System.out.println(" check s=" + tf.term.utf8ToString());
boolean prefixMatches = true;
for(int i=0;i keys = shuffle(
- new TermFreqPayload("foo bar baz blah", 50),
- new TermFreqPayload("boo foo bar foo bee", 20)
+ Iterable keys = shuffle(
+ new Input("foo bar baz blah", 50),
+ new Input("boo foo bar foo bee", 20)
);
Analyzer a = new MockAnalyzer(random());
FreeTextSuggester sug = new FreeTextSuggester(a, a, 2, (byte) 0x20);
- sug.build(new TermFreqPayloadArrayIterator(keys));
+ sug.build(new InputArrayIterator(keys));
for(int i=0;i<2;i++) {
@@ -101,12 +101,12 @@ public class TestFreeTextSuggester extends LuceneTestCase {
public void testIllegalByteDuringBuild() throws Exception {
// Default separator is INFORMATION SEPARATOR TWO
// (0x1e), so no input token is allowed to contain it
- Iterable keys = shuffle(
- new TermFreqPayload("foo\u001ebar baz", 50)
+ Iterable keys = shuffle(
+ new Input("foo\u001ebar baz", 50)
);
FreeTextSuggester sug = new FreeTextSuggester(new MockAnalyzer(random()));
try {
- sug.build(new TermFreqPayloadArrayIterator(keys));
+ sug.build(new InputArrayIterator(keys));
fail("did not hit expected exception");
} catch (IllegalArgumentException iae) {
// expected
@@ -116,11 +116,11 @@ public class TestFreeTextSuggester extends LuceneTestCase {
public void testIllegalByteDuringQuery() throws Exception {
// Default separator is INFORMATION SEPARATOR TWO
// (0x1e), so no input token is allowed to contain it
- Iterable keys = shuffle(
- new TermFreqPayload("foo bar baz", 50)
+ Iterable keys = shuffle(
+ new Input("foo bar baz", 50)
);
FreeTextSuggester sug = new FreeTextSuggester(new MockAnalyzer(random()));
- sug.build(new TermFreqPayloadArrayIterator(keys));
+ sug.build(new InputArrayIterator(keys));
try {
sug.lookup("foo\u001eb", 10);
@@ -136,7 +136,7 @@ public class TestFreeTextSuggester extends LuceneTestCase {
// Skip header:
lfd.nextDoc();
FreeTextSuggester sug = new FreeTextSuggester(new MockAnalyzer(random()));
- sug.build(new TermFreqPayloadIterator() {
+ sug.build(new InputIterator() {
private int count;
@@ -185,13 +185,13 @@ public class TestFreeTextSuggester extends LuceneTestCase {
// Make sure you can suggest based only on unigram model:
public void testUnigrams() throws Exception {
- Iterable keys = shuffle(
- new TermFreqPayload("foo bar baz blah boo foo bar foo bee", 50)
+ Iterable keys = shuffle(
+ new Input("foo bar baz blah boo foo bar foo bee", 50)
);
Analyzer a = new MockAnalyzer(random());
FreeTextSuggester sug = new FreeTextSuggester(a, a, 1, (byte) 0x20);
- sug.build(new TermFreqPayloadArrayIterator(keys));
+ sug.build(new InputArrayIterator(keys));
// Sorts first by count, descending, second by term, ascending
assertEquals("bar/0.22 baz/0.11 bee/0.11 blah/0.11 boo/0.11",
toString(sug.lookup("b", 10)));
@@ -199,24 +199,24 @@ public class TestFreeTextSuggester extends LuceneTestCase {
// Make sure the last token is not duplicated
public void testNoDupsAcrossGrams() throws Exception {
- Iterable keys = shuffle(
- new TermFreqPayload("foo bar bar bar bar", 50)
+ Iterable keys = shuffle(
+ new Input("foo bar bar bar bar", 50)
);
Analyzer a = new MockAnalyzer(random());
FreeTextSuggester sug = new FreeTextSuggester(a, a, 2, (byte) 0x20);
- sug.build(new TermFreqPayloadArrayIterator(keys));
+ sug.build(new InputArrayIterator(keys));
assertEquals("foo bar/1.00",
toString(sug.lookup("foo b", 10)));
}
// Lookup of just empty string produces unicode only matches:
public void testEmptyString() throws Exception {
- Iterable keys = shuffle(
- new TermFreqPayload("foo bar bar bar bar", 50)
+ Iterable keys = shuffle(
+ new Input("foo bar bar bar bar", 50)
);
Analyzer a = new MockAnalyzer(random());
FreeTextSuggester sug = new FreeTextSuggester(a, a, 2, (byte) 0x20);
- sug.build(new TermFreqPayloadArrayIterator(keys));
+ sug.build(new InputArrayIterator(keys));
try {
sug.lookup("", 10);
fail("did not hit exception");
@@ -238,11 +238,11 @@ public class TestFreeTextSuggester extends LuceneTestCase {
}
};
- Iterable keys = shuffle(
- new TermFreqPayload("wizard of oz", 50)
+ Iterable keys = shuffle(
+ new Input("wizard of oz", 50)
);
FreeTextSuggester sug = new FreeTextSuggester(a, a, 3, (byte) 0x20);
- sug.build(new TermFreqPayloadArrayIterator(keys));
+ sug.build(new InputArrayIterator(keys));
assertEquals("wizard _ oz/1.00",
toString(sug.lookup("wizard of", 10)));
@@ -266,11 +266,11 @@ public class TestFreeTextSuggester extends LuceneTestCase {
}
};
- Iterable keys = shuffle(
- new TermFreqPayload("wizard of of oz", 50)
+ Iterable keys = shuffle(
+ new Input("wizard of of oz", 50)
);
FreeTextSuggester sug = new FreeTextSuggester(a, a, 3, (byte) 0x20);
- sug.build(new TermFreqPayloadArrayIterator(keys));
+ sug.build(new InputArrayIterator(keys));
assertEquals("",
toString(sug.lookup("wizard of of", 10)));
}
@@ -330,7 +330,7 @@ public class TestFreeTextSuggester extends LuceneTestCase {
// Build suggester model:
FreeTextSuggester sug = new FreeTextSuggester(a, a, grams, (byte) 0x20);
- sug.build(new TermFreqPayloadIterator() {
+ sug.build(new InputIterator() {
int upto;
@Override
diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java
index 42594e7..de12673 100644
--- lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java
+++ lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/FSTCompletionTest.java
@@ -28,8 +28,8 @@ import org.apache.lucene.util.*;
* Unit tests for {@link FSTCompletion}.
*/
public class FSTCompletionTest extends LuceneTestCase {
- public static TermFreqPayload tf(String t, int v) {
- return new TermFreqPayload(t, v);
+ public static Input tf(String t, int v) {
+ return new Input(t, v);
}
private FSTCompletion completion;
@@ -40,15 +40,15 @@ public class FSTCompletionTest extends LuceneTestCase {
super.setUp();
FSTCompletionBuilder builder = new FSTCompletionBuilder();
- for (TermFreqPayload tf : evalKeys()) {
+ for (Input tf : evalKeys()) {
builder.add(tf.term, (int) tf.v);
}
completion = builder.build();
completionAlphabetical = new FSTCompletion(completion.getFST(), false, true);
}
- private TermFreqPayload[] evalKeys() {
- final TermFreqPayload[] keys = new TermFreqPayload[] {
+ private Input[] evalKeys() {
+ final Input[] keys = new Input[] {
tf("one", 0),
tf("oneness", 1),
tf("onerous", 1),
@@ -157,17 +157,17 @@ public class FSTCompletionTest extends LuceneTestCase {
FSTCompletionLookup lookup = new FSTCompletionLookup(10, true);
Random r = random();
- List keys = new ArrayList();
+ List keys = new ArrayList();
for (int i = 0; i < 5000; i++) {
- keys.add(new TermFreqPayload(_TestUtil.randomSimpleString(r), -1));
+ keys.add(new Input(_TestUtil.randomSimpleString(r), -1));
}
- lookup.build(new TermFreqPayloadArrayIterator(keys));
+ lookup.build(new InputArrayIterator(keys));
// All the weights were constant, so all returned buckets must be constant, whatever they
// are.
Long previous = null;
- for (TermFreqPayload tf : keys) {
+ for (Input tf : keys) {
Long current = ((Number)lookup.get(_TestUtil.bytesToCharSequence(tf.term, random()))).longValue();
if (previous != null) {
assertEquals(previous, current);
@@ -177,11 +177,11 @@ public class FSTCompletionTest extends LuceneTestCase {
}
public void testMultilingualInput() throws Exception {
- List input = LookupBenchmarkTest.readTop50KWiki();
+ List input = LookupBenchmarkTest.readTop50KWiki();
FSTCompletionLookup lookup = new FSTCompletionLookup();
- lookup.build(new TermFreqPayloadArrayIterator(input));
- for (TermFreqPayload tf : input) {
+ lookup.build(new InputArrayIterator(input));
+ for (Input tf : input) {
assertNotNull("Not found: " + tf.term.toString(), lookup.get(_TestUtil.bytesToCharSequence(tf.term, random())));
assertEquals(tf.term.utf8ToString(), lookup.lookup(_TestUtil.bytesToCharSequence(tf.term, random()), true, 1).get(0).key.toString());
}
@@ -198,17 +198,17 @@ public class FSTCompletionTest extends LuceneTestCase {
}
public void testRandom() throws Exception {
- List freqs = new ArrayList();
+ List freqs = new ArrayList();
Random rnd = random();
for (int i = 0; i < 2500 + rnd.nextInt(2500); i++) {
int weight = rnd.nextInt(100);
- freqs.add(new TermFreqPayload("" + rnd.nextLong(), weight));
+ freqs.add(new Input("" + rnd.nextLong(), weight));
}
FSTCompletionLookup lookup = new FSTCompletionLookup();
- lookup.build(new TermFreqPayloadArrayIterator(freqs.toArray(new TermFreqPayload[freqs.size()])));
+ lookup.build(new InputArrayIterator(freqs.toArray(new Input[freqs.size()])));
- for (TermFreqPayload tf : freqs) {
+ for (Input tf : freqs) {
final String term = tf.term.utf8ToString();
for (int i = 1; i < term.length(); i++) {
String prefix = term.substring(0, i);
diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java
index 2fe2c7a..56cc62b 100644
--- lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java
+++ lucene/suggest/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java
@@ -20,8 +20,8 @@ package org.apache.lucene.search.suggest.fst;
import java.util.*;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
-import org.apache.lucene.search.suggest.TermFreqPayload;
-import org.apache.lucene.search.suggest.TermFreqPayloadArrayIterator;
+import org.apache.lucene.search.suggest.Input;
+import org.apache.lucene.search.suggest.InputArrayIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
@@ -29,16 +29,16 @@ import org.apache.lucene.util._TestUtil;
public class WFSTCompletionTest extends LuceneTestCase {
public void testBasic() throws Exception {
- TermFreqPayload keys[] = new TermFreqPayload[] {
- new TermFreqPayload("foo", 50),
- new TermFreqPayload("bar", 10),
- new TermFreqPayload("barbar", 12),
- new TermFreqPayload("barbara", 6)
+ Input keys[] = new Input[] {
+ new Input("foo", 50),
+ new Input("bar", 10),
+ new Input("barbar", 12),
+ new Input("barbara", 6)
};
Random random = new Random(random().nextLong());
WFSTCompletionLookup suggester = new WFSTCompletionLookup();
- suggester.build(new TermFreqPayloadArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
// top N of 2, but only foo is available
List results = suggester.lookup(_TestUtil.stringToCharSequence("f", random), false, 2);
@@ -81,9 +81,9 @@ public class WFSTCompletionTest extends LuceneTestCase {
WFSTCompletionLookup suggester = new WFSTCompletionLookup(true);
- suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
- new TermFreqPayload("x y", 20),
- new TermFreqPayload("x", 2),
+ suggester.build(new InputArrayIterator(new Input[] {
+ new Input("x y", 20),
+ new Input("x", 2),
}));
for(int topN=1;topN<4;topN++) {
@@ -105,9 +105,9 @@ public class WFSTCompletionTest extends LuceneTestCase {
WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
- suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
- new TermFreqPayload("x y", 20),
- new TermFreqPayload("x", 2),
+ suggester.build(new InputArrayIterator(new Input[] {
+ new Input("x y", 20),
+ new Input("x", 2),
}));
for(int topN=1;topN<4;topN++) {
@@ -131,7 +131,7 @@ public class WFSTCompletionTest extends LuceneTestCase {
final TreeMap slowCompletor = new TreeMap();
final TreeSet allPrefixes = new TreeSet();
- TermFreqPayload[] keys = new TermFreqPayload[numWords];
+ Input[] keys = new Input[numWords];
for (int i = 0; i < numWords; i++) {
String s;
@@ -150,11 +150,11 @@ public class WFSTCompletionTest extends LuceneTestCase {
// we can probably do Integer.MAX_VALUE here, but why worry.
int weight = random().nextInt(1<<24);
slowCompletor.put(s, (long)weight);
- keys[i] = new TermFreqPayload(s, weight);
+ keys[i] = new Input(s, weight);
}
WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
- suggester.build(new TermFreqPayloadArrayIterator(keys));
+ suggester.build(new InputArrayIterator(keys));
Random random = new Random(random().nextLong());
for (String prefix : allPrefixes) {
@@ -205,16 +205,16 @@ public class WFSTCompletionTest extends LuceneTestCase {
WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
- suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[] {
- new TermFreqPayload(key1, 50),
- new TermFreqPayload(key2, 50),
+ suggester.build(new InputArrayIterator(new Input[] {
+ new Input(key1, 50),
+ new Input(key2, 50),
}));
}
public void testEmpty() throws Exception {
WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
- suggester.build(new TermFreqPayloadArrayIterator(new TermFreqPayload[0]));
+ suggester.build(new InputArrayIterator(new Input[0]));
List result = suggester.lookup("a", false, 20);
assertTrue(result.isEmpty());
}