Index: lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayload.java =================================================================== --- lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayload.java (revision 0) +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayload.java (working copy) @@ -0,0 +1,36 @@ +package org.apache.lucene.search.suggest; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.BytesRef; + +public final class TermFreqPayload { + public final BytesRef term; + public final long v; + public final BytesRef payload; + + public TermFreqPayload(String term, long v, BytesRef payload) { + this(new BytesRef(term), v, payload); + } + + public TermFreqPayload(BytesRef term, long v, BytesRef payload) { + this.term = term; + this.v = v; + this.payload = payload; + } +} \ No newline at end of file Property changes on: lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayload.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayloadArrayIterator.java =================================================================== --- lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayloadArrayIterator.java (revision 0) +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayloadArrayIterator.java (working copy) @@ -0,0 +1,72 @@ +package org.apache.lucene.search.suggest; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Arrays; +import java.util.Comparator; +import java.util.Iterator; + +import org.apache.lucene.search.spell.TermFreqIterator; +import org.apache.lucene.search.spell.TermFreqPayloadIterator; +import org.apache.lucene.util.BytesRef; + +/** + * A {@link TermFreqIterator} over a sequence of {@link TermFreq}s. + */ +public final class TermFreqPayloadArrayIterator implements TermFreqPayloadIterator { + private final Iterator i; + private TermFreqPayload current; + private final BytesRef spare = new BytesRef(); + + public TermFreqPayloadArrayIterator(Iterator i) { + this.i = i; + } + + public TermFreqPayloadArrayIterator(TermFreqPayload[] i) { + this(Arrays.asList(i)); + } + + public TermFreqPayloadArrayIterator(Iterable i) { + this(i.iterator()); + } + + @Override + public long weight() { + return current.v; + } + + @Override + public BytesRef next() { + if (i.hasNext()) { + current = i.next(); + spare.copyBytes(current.term); + return spare; + } + return null; + } + + @Override + public BytesRef payload() { + return current.payload; + } + + @Override + public Comparator getComparator() { + return null; + } +} \ No newline at end of file Property changes on: lucene/suggest/src/test/org/apache/lucene/search/suggest/TermFreqPayloadArrayIterator.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java =================================================================== --- lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java (revision 1455339) +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java (working copy) @@ -53,6 +53,8 @@ import org.apache.lucene.search.suggest.Lookup.LookupResult; import org.apache.lucene.search.suggest.TermFreq; import org.apache.lucene.search.suggest.TermFreqArrayIterator; +import org.apache.lucene.search.suggest.TermFreqPayload; +import org.apache.lucene.search.suggest.TermFreqPayloadArrayIterator; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; @@ -103,6 +105,56 @@ assertEquals(6, results.get(2).value, 0.01F); } + public void testKeywordWithPayloads() throws Exception { + TermFreqPayload keys[] = new TermFreqPayload[] { + new TermFreqPayload("foo", 50, new BytesRef("hello")), + new TermFreqPayload("bar", 10, new BytesRef("goodbye")), + new TermFreqPayload("barbar", 12, new BytesRef("thank you")), + new TermFreqPayload("barbara", 6, new BytesRef("for all the fish")) + }; + + AnalyzingSuggester suggester = new AnalyzingSuggester(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)); + suggester.build(new TermFreqPayloadArrayIterator(keys)); + + // top N of 2, but only foo is available + List results = suggester.lookup(_TestUtil.stringToCharSequence("f", random()), false, 2); + assertEquals(1, results.size()); + assertEquals("foo", results.get(0).key.toString()); + assertEquals(50, results.get(0).value, 0.01F); + assertEquals(new BytesRef("hello"), results.get(0).payload); + + // top N of 1 for 'bar': we return this even though + // barbar is higher because exactFirst is enabled: + results = suggester.lookup(_TestUtil.stringToCharSequence("bar", random()), false, 1); + assertEquals(1, results.size()); + assertEquals("bar", results.get(0).key.toString()); + assertEquals(10, results.get(0).value, 0.01F); + assertEquals(new BytesRef("goodbye"), results.get(0).payload); + + // top N Of 2 for 'b' + results = suggester.lookup(_TestUtil.stringToCharSequence("b", random()), false, 2); + assertEquals(2, results.size()); + assertEquals("barbar", results.get(0).key.toString()); + assertEquals(12, results.get(0).value, 0.01F); + assertEquals(new BytesRef("thank you"), results.get(0).payload); + assertEquals("bar", results.get(1).key.toString()); + assertEquals(10, results.get(1).value, 0.01F); + assertEquals(new BytesRef("goodbye"), results.get(1).payload); + + // top N of 3 for 'ba' + results = suggester.lookup(_TestUtil.stringToCharSequence("ba", random()), false, 3); + assertEquals(3, results.size()); + assertEquals("barbar", results.get(0).key.toString()); + assertEquals(12, results.get(0).value, 0.01F); + assertEquals(new BytesRef("thank you"), results.get(0).payload); + assertEquals("bar", results.get(1).key.toString()); + assertEquals(10, results.get(1).value, 0.01F); + assertEquals(new BytesRef("goodbye"), results.get(1).payload); + assertEquals("barbara", results.get(2).key.toString()); + assertEquals(6, results.get(2).value, 0.01F); + assertEquals(new BytesRef("for all the fish"), results.get(2).payload); + } + // TODO: more tests /** * basic "standardanalyzer" test with stopword removal @@ -435,11 +487,13 @@ public final String surfaceForm; public final String analyzedForm; public final long weight; + public final BytesRef payload; - public TermFreq2(String surfaceForm, String analyzedForm, long weight) { + public TermFreq2(String surfaceForm, String analyzedForm, long weight, BytesRef payload) { this.surfaceForm = surfaceForm; this.analyzedForm = analyzedForm; this.weight = weight; + this.payload = payload; } @Override @@ -549,8 +603,16 @@ final TreeSet allPrefixes = new TreeSet(); final Set seen = new HashSet(); - TermFreq[] keys = new TermFreq[numQueries]; + boolean doPayloads = random().nextBoolean(); + TermFreq[] keys = null; + TermFreqPayload[] payloadKeys = null; + if (doPayloads) { + payloadKeys = new TermFreqPayload[numQueries]; + } else { + keys = new TermFreq[numQueries]; + } + boolean preserveSep = random().nextBoolean(); final int numStopChars = random().nextInt(10); @@ -614,9 +676,18 @@ } // we can probably do Integer.MAX_VALUE here, but why worry. int weight = random().nextInt(1<<24); - keys[i] = new TermFreq(key, weight); + BytesRef payload; + if (doPayloads) { + byte[] bytes = new byte[random().nextInt(10)]; + random().nextBytes(bytes); + payload = new BytesRef(bytes); + payloadKeys[i] = new TermFreqPayload(key, weight, payload); + } else { + keys[i] = new TermFreq(key, weight); + payload = null; + } - slowCompletor.add(new TermFreq2(key, analyzedKey, weight)); + slowCompletor.add(new TermFreq2(key, analyzedKey, weight, payload)); } if (VERBOSE) { @@ -632,7 +703,11 @@ Analyzer a = new MockTokenEatingAnalyzer(numStopChars, preserveHoles); AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, preserveSep ? AnalyzingSuggester.PRESERVE_SEP : 0, 256, -1); - suggester.build(new TermFreqArrayIterator(keys)); + if (doPayloads) { + suggester.build(new TermFreqPayloadArrayIterator(payloadKeys)); + } else { + suggester.build(new TermFreqArrayIterator(keys)); + } for (String prefix : allPrefixes) { @@ -739,6 +814,9 @@ //System.out.println(" check hit " + hit); assertEquals(matches.get(hit).surfaceForm.toString(), r.get(hit).key.toString()); assertEquals(matches.get(hit).weight, r.get(hit).value, 0f); + if (doPayloads) { + assertEquals(matches.get(hit).payload, r.get(hit).payload); + } } } } Index: lucene/suggest/src/java/org/apache/lucene/search/spell/TermFreqPayloadIterator.java =================================================================== --- lucene/suggest/src/java/org/apache/lucene/search/spell/TermFreqPayloadIterator.java (revision 0) +++ lucene/suggest/src/java/org/apache/lucene/search/spell/TermFreqPayloadIterator.java (working copy) @@ -0,0 +1,36 @@ +package org.apache.lucene.search.spell; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.suggest.Lookup.LookupResult; // javadocs +import org.apache.lucene.search.suggest.analyzing.AnalyzingSuggester; // javadocs +import org.apache.lucene.search.suggest.analyzing.FuzzySuggester; // javadocs +import org.apache.lucene.util.BytesRef; + +/** + * Interface for enumerating term,weight,payload triples; + * currently only {@link AnalyzingSuggester} and {@link + * FuzzySuggester} support payloads. + */ +public interface TermFreqPayloadIterator extends TermFreqIterator { + + /** An arbitrary byte[] to record per suggestion. See + * {@link LookupResult#payload} to retrieve the payload + * for each suggestion. */ + public BytesRef payload(); +} Property changes on: lucene/suggest/src/java/org/apache/lucene/search/spell/TermFreqPayloadIterator.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java =================================================================== --- lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java (revision 1455339) +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java (working copy) @@ -25,9 +25,10 @@ import java.util.List; import org.apache.lucene.search.spell.TermFreqIterator; +import org.apache.lucene.search.spell.TermFreqPayloadIterator; import org.apache.lucene.search.suggest.Lookup; +import org.apache.lucene.search.suggest.Sort.SortInfo; import org.apache.lucene.search.suggest.Sort; -import org.apache.lucene.search.suggest.Sort.SortInfo; import org.apache.lucene.search.suggest.fst.FSTCompletion.Completion; import org.apache.lucene.search.suggest.tst.TSTLookup; import org.apache.lucene.store.ByteArrayDataInput; @@ -141,6 +142,9 @@ @Override public void build(TermFreqIterator tfit) throws IOException { + if (tfit instanceof TermFreqPayloadIterator) { + throw new IllegalArgumentException("this suggester doesn't support payloads"); + } File tempInput = File.createTempFile( FSTCompletionLookup.class.getSimpleName(), ".input", Sort.defaultTempDir()); File tempSorted = File.createTempFile( Index: lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java =================================================================== --- lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java (revision 1455339) +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java (working copy) @@ -26,9 +26,10 @@ import java.util.List; import org.apache.lucene.search.spell.TermFreqIterator; +import org.apache.lucene.search.spell.TermFreqPayloadIterator; import org.apache.lucene.search.suggest.Lookup; +import org.apache.lucene.search.suggest.Sort.ByteSequencesWriter; import org.apache.lucene.search.suggest.SortedTermFreqIteratorWrapper; -import org.apache.lucene.search.suggest.Sort.ByteSequencesWriter; import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.ByteArrayDataOutput; import org.apache.lucene.store.InputStreamDataInput; @@ -40,12 +41,12 @@ import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.fst.Builder; -import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.FST.Arc; import org.apache.lucene.util.fst.FST.BytesReader; +import org.apache.lucene.util.fst.FST; import org.apache.lucene.util.fst.PositiveIntOutputs; +import org.apache.lucene.util.fst.Util.MinResult; import org.apache.lucene.util.fst.Util; -import org.apache.lucene.util.fst.Util.MinResult; /** * Suggester based on a weighted FST: it first traverses the prefix, @@ -93,6 +94,9 @@ @Override public void build(TermFreqIterator iterator) throws IOException { + if (iterator instanceof TermFreqPayloadIterator) { + throw new IllegalArgumentException("this suggester doesn't support payloads"); + } BytesRef scratch = new BytesRef(); TermFreqIterator iter = new WFSTTermFreqIteratorWrapper(iterator); IntsRef scratchInts = new IntsRef(); Index: lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java =================================================================== --- lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java (revision 1455339) +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java (working copy) @@ -33,6 +33,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStreamToAutomaton; import org.apache.lucene.search.spell.TermFreqIterator; +import org.apache.lucene.search.spell.TermFreqPayloadIterator; import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.Sort; import org.apache.lucene.store.ByteArrayDataInput; @@ -180,6 +181,10 @@ * graphs this will always be 1. */ private int maxAnalyzedPathsForOneInput; + private boolean hasPayloads; + + private static final int PAYLOAD_SEP = '\u001f'; + /** * Calls {@link #AnalyzingSuggester(Analyzer,Analyzer,int,int,int) * AnalyzingSuggester(analyzer, analyzer, EXACT_FIRST | @@ -330,8 +335,15 @@ return new TokenStreamToAutomaton(); } } + + private static class AnalyzingComparator implements Comparator { - private Comparator sortComparator = new Comparator() { + private final boolean hasPayloads; + + public AnalyzingComparator(boolean hasPayloads) { + this.hasPayloads = hasPayloads; + } + private final ByteArrayDataInput readerA = new ByteArrayDataInput(); private final ByteArrayDataInput readerB = new ByteArrayDataInput(); private final BytesRef scratchA = new BytesRef(); @@ -367,10 +379,19 @@ } // Finally by surface form: - scratchA.offset = readerA.getPosition(); - scratchA.length = a.length - scratchA.offset; - scratchB.offset = readerB.getPosition(); - scratchB.length = b.length - scratchB.offset; + if (hasPayloads) { + readerA.setPosition(readerA.getPosition() + scratchA.length); + scratchA.length = readerA.readShort(); + scratchA.offset = readerA.getPosition(); + readerB.setPosition(readerB.getPosition() + scratchB.length); + scratchB.length = readerB.readShort(); + scratchB.offset = readerB.getPosition(); + } else { + scratchA.offset = readerA.getPosition(); + scratchA.length = a.length - scratchA.offset; + scratchB.offset = readerB.getPosition(); + scratchB.length = b.length - scratchB.offset; + } cmp = scratchA.compareTo(scratchB); if (cmp != 0) { @@ -380,21 +401,28 @@ return 0; } }; - + @Override public void build(TermFreqIterator iterator) throws IOException { String prefix = getClass().getSimpleName(); File directory = Sort.defaultTempDir(); File tempInput = File.createTempFile(prefix, ".input", directory); File tempSorted = File.createTempFile(prefix, ".sorted", directory); - + + TermFreqPayloadIterator payloads; + if (iterator instanceof TermFreqPayloadIterator) { + payloads = (TermFreqPayloadIterator) iterator; + } else { + payloads = null; + } + hasPayloads = payloads != null; + Sort.ByteSequencesWriter writer = new Sort.ByteSequencesWriter(tempInput); Sort.ByteSequencesReader reader = null; BytesRef scratch = new BytesRef(); TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton(); - // analyzed sequence + 0(byte) + weight(int) + surface + analyzedLength(short) boolean success = false; byte buffer[] = new byte[8]; try { @@ -419,6 +447,19 @@ // compute the required length: // analyzed sequence + weight (4) + surface + analyzedLength (short) int requiredLength = analyzedLength + 4 + surfaceForm.length + 2; + + BytesRef payload; + + if (hasPayloads) { + if (surfaceForm.length > (Short.MAX_VALUE-2)) { + throw new IllegalArgumentException("cannot handle surface form > " + (Short.MAX_VALUE-2) + " in length (got " + surfaceForm.length + ")"); + } + payload = payloads.payload(); + // payload + surfaceLength (short) + requiredLength += payload.length + 2; + } else { + payload = null; + } buffer = ArrayUtil.grow(buffer, requiredLength); @@ -430,7 +471,18 @@ output.writeInt(encodeWeight(iterator.weight())); - output.writeBytes(surfaceForm.bytes, surfaceForm.offset, surfaceForm.length); + if (hasPayloads) { + for(int i=0;i " + cost + ": " + surface.utf8ToString()); - builder.add(scratchInts, outputs.newPair(cost, BytesRef.deepCopyOf(surface))); + if (!hasPayloads) { + builder.add(scratchInts, outputs.newPair(cost, BytesRef.deepCopyOf(surface))); + } else { + int payloadOffset = input.getPosition() + surface.length; + int payloadLength = scratch.length - payloadOffset; + BytesRef br = new BytesRef(surface.length + 1 + payloadLength); + System.arraycopy(surface.bytes, surface.offset, br.bytes, 0, surface.length); + br.bytes[surface.length] = PAYLOAD_SEP; + System.arraycopy(scratch.bytes, payloadOffset, br.bytes, surface.length+1, payloadLength); + br.length = br.bytes.length; + builder.add(scratchInts, outputs.newPair(cost, br)); + } } fst = builder.finish(); @@ -542,6 +610,7 @@ fst.save(dataOut); dataOut.writeVInt(maxAnalyzedPathsForOneInput); + dataOut.writeByte((byte) (hasPayloads ? 1 : 0)); } finally { IOUtils.close(output); } @@ -554,12 +623,58 @@ try { this.fst = new FST>(dataIn, new PairOutputs(PositiveIntOutputs.getSingleton(true), ByteSequenceOutputs.getSingleton())); maxAnalyzedPathsForOneInput = dataIn.readVInt(); + hasPayloads = dataIn.readByte() == 1; } finally { IOUtils.close(input); } return true; } + private LookupResult getLookupResult(Long output1, BytesRef output2, CharsRef spare) { + LookupResult result; + if (hasPayloads) { + int sepIndex = -1; + for(int i=0;i= output2.length) { + return false; + } + for(int i=0;i lookup(final CharSequence key, boolean onlyMorePopular, int num) { assert num > 0; @@ -639,10 +754,9 @@ // nodes we have and the // maxSurfaceFormsPerAnalyzedForm: for(MinResult> completion : completions) { - if (utf8Key.bytesEquals(completion.output.output2)) { - spare.grow(completion.output.output2.length); - UnicodeUtil.UTF8toUTF16(completion.output.output2, spare); - results.add(new LookupResult(spare.toString(), decodeWeight(completion.output.output1))); + BytesRef output2 = completion.output.output2; + if (sameSurfaceForm(utf8Key, output2)) { + results.add(getLookupResult(completion.output.output1, output2, spare)); break; } } @@ -676,7 +790,7 @@ // In exactFirst mode, don't accept any paths // matching the surface form since that will // create duplicate results: - if (utf8Key.bytesEquals(output.output2)) { + if (sameSurfaceForm(utf8Key, output.output2)) { // We found exact match, which means we should // have already found it in the first search: assert results.size() == 1; @@ -697,10 +811,9 @@ MinResult> completions[] = searcher.search(); for(MinResult> completion : completions) { - spare.grow(completion.output.output2.length); - UnicodeUtil.UTF8toUTF16(completion.output.output2, spare); - LookupResult result = new LookupResult(spare.toString(), decodeWeight(completion.output.output1)); + LookupResult result = getLookupResult(completion.output.output1, completion.output.output2, spare); + // TODO: for fuzzy case would be nice to return // how many edits were required Index: lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java =================================================================== --- lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java (revision 1455339) +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java (working copy) @@ -25,9 +25,10 @@ import java.util.ArrayList; import java.util.List; +import org.apache.lucene.search.spell.TermFreqIterator; +import org.apache.lucene.search.spell.TermFreqPayloadIterator; import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.SortedTermFreqIteratorWrapper; -import org.apache.lucene.search.spell.TermFreqIterator; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.IOUtils; @@ -51,6 +52,9 @@ @Override public void build(TermFreqIterator tfit) throws IOException { + if (tfit instanceof TermFreqPayloadIterator) { + throw new IllegalArgumentException("this suggester doesn't support payloads"); + } root = new TernaryTreeNode(); // buffer first if (tfit.getComparator() != BytesRef.getUTF8SortedAsUTF16Comparator()) { Index: lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java =================================================================== --- lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java (revision 1455339) +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java (working copy) @@ -25,6 +25,7 @@ import org.apache.lucene.search.spell.Dictionary; import org.apache.lucene.search.spell.TermFreqIterator; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefIterator; import org.apache.lucene.util.PriorityQueue; @@ -39,17 +40,29 @@ public static final class LookupResult implements Comparable { /** the key's text */ public final CharSequence key; + /** the key's weight */ public final long value; + + /** the key's payload (null if not present) */ + public final BytesRef payload; /** * Create a new result from a key+weight pair. */ public LookupResult(CharSequence key, long value) { + this(key, value, null); + } + + /** + * Create a new result from a key+weight+payload triple. + */ + public LookupResult(CharSequence key, long value, BytesRef payload) { this.key = key; this.value = value; + this.payload = payload; } - + @Override public String toString() { return key + "/" + value; Index: lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java =================================================================== --- lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java (revision 1455339) +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java (working copy) @@ -26,6 +26,7 @@ import java.util.List; import org.apache.lucene.search.spell.TermFreqIterator; +import org.apache.lucene.search.spell.TermFreqPayloadIterator; import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.UnsortedTermFreqIteratorWrapper; import org.apache.lucene.search.suggest.jaspell.JaspellTernarySearchTrie.TSTNode; @@ -53,6 +54,9 @@ @Override public void build(TermFreqIterator tfit) throws IOException { + if (tfit instanceof TermFreqPayloadIterator) { + throw new IllegalArgumentException("this suggester doesn't support payloads"); + } if (tfit.getComparator() != null) { // make sure it's unsorted // WTF - this could result in yet another sorted iteration.... Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1455339) +++ lucene/CHANGES.txt (working copy) @@ -55,6 +55,9 @@ query "i " will no longer suggest "Isla de Muerta" for example. (Mike McCandless) +* LUCENE-4820: Add payloads to Analyzing/FuzzySuggester, to record an + arbitrary byte[] per suggestion (Mike McCandless) + ======================= Lucene 4.2.0 ======================= Changes in backwards compatibility policy