Index: lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java =================================================================== --- lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java (revision 1506631) +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java (working copy) @@ -34,6 +34,29 @@ * @lucene.experimental */ public abstract class Lookup { + + /** Used to return highlighted result; see {@link + * LookupResult#highlightKey} */ + public static final class LookupHighlightFragment { + /** Portion of text for this fragment. */ + public final String text; + + /** True if this text matched a part of the user's + * query. */ + public final boolean isHit; + + /** Sole constructor. */ + public LookupHighlightFragment(String text, boolean isHit) { + this.text = text; + this.isHit = isHit; + } + + @Override + public String toString() { + return "LookupHighlightFragment(text=" + text + " isHit=" + isHit + ")"; + } + } + /** * Result of a lookup. */ @@ -41,6 +64,9 @@ /** the key's text */ public final CharSequence key; + /** Optional: the key's text, after highlighting. */ + public final LookupHighlightFragment[] highlightKey; + /** the key's weight */ public final long value; @@ -59,10 +85,21 @@ */ public LookupResult(CharSequence key, long value, BytesRef payload) { this.key = key; + this.highlightKey = null; this.value = value; this.payload = payload; } + /** + * Create a new result from a key+highlightKey+weight+payload triple. + */ + public LookupResult(CharSequence key, LookupHighlightFragment[] highlightKey, long value, BytesRef payload) { + this.key = key; + this.highlightKey = highlightKey; + this.value = value; + this.payload = payload; + } + @Override public String toString() { return key + "/" + value; Index: lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java =================================================================== --- lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java (revision 1506631) +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java (working copy) @@ -68,6 +68,7 @@ import org.apache.lucene.search.spell.TermFreqIterator; import org.apache.lucene.search.spell.TermFreqPayloadIterator; import org.apache.lucene.search.suggest.Lookup; +import org.apache.lucene.search.suggest.Lookup.LookupHighlightFragment; // javadocs import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.BytesRef; @@ -422,9 +423,6 @@ ScoreDoc sd = hits.scoreDocs[i]; textDV.get(sd.doc, scratch); String text = scratch.utf8ToString(); - if (doHighlight) { - text = highlight(text, matchedTokens, prefixToken); - } long score = weightsDV.get(sd.doc); BytesRef payload; @@ -435,7 +433,14 @@ payload = null; } - results.add(new LookupResult(text, score, payload)); + LookupResult result; + + if (doHighlight) { + result = new LookupResult(text, highlight(text, matchedTokens, prefixToken), score, payload); + } else { + result = new LookupResult(text, score, payload); + } + results.add(result); } //System.out.println((System.currentTimeMillis() - t0) + " msec for infix suggest"); //System.out.println(results); @@ -451,19 +456,19 @@ return in; } - private String highlight(String text, Set matchedTokens, String prefixToken) throws IOException { + private LookupHighlightFragment[] highlight(String text, Set matchedTokens, String prefixToken) throws IOException { TokenStream ts = queryAnalyzer.tokenStream("text", new StringReader(text)); CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); ts.reset(); - StringBuilder sb = new StringBuilder(); + List fragments = new ArrayList(); int upto = 0; while (ts.incrementToken()) { String token = termAtt.toString(); int startOffset = offsetAtt.startOffset(); int endOffset = offsetAtt.endOffset(); if (upto < startOffset) { - sb.append(text.substring(upto, startOffset)); + addNonMatch(fragments, text.substring(upto, startOffset)); upto = startOffset; } else if (upto > startOffset) { continue; @@ -471,51 +476,60 @@ if (matchedTokens.contains(token)) { // Token matches. - addWholeMatch(sb, text.substring(startOffset, endOffset), token); + addWholeMatch(fragments, text.substring(startOffset, endOffset), token); upto = endOffset; } else if (prefixToken != null && token.startsWith(prefixToken)) { - addPrefixMatch(sb, text.substring(startOffset, endOffset), token, prefixToken); + addPrefixMatch(fragments, text.substring(startOffset, endOffset), token, prefixToken); upto = endOffset; } } ts.end(); int endOffset = offsetAtt.endOffset(); if (upto < endOffset) { - sb.append(text.substring(upto)); + addNonMatch(fragments, text.substring(upto)); } ts.close(); - return sb.toString(); + return fragments.toArray(new LookupHighlightFragment[fragments.size()]); } - /** Appends the whole matched token to the provided {@code - * StringBuilder}. */ - protected void addWholeMatch(StringBuilder sb, String surface, String analyzed) { - sb.append(""); - sb.append(surface); - sb.append(""); + /** Called while highlighting a single result, to append a + * non-matching chunk of text from the suggestion to the + * provided fragments list. + * @param fragments List of {@link LookupHighlightFragment} to add to + * @param text The text chunk to add + */ + protected void addNonMatch(List fragments, String text) { + fragments.add(new LookupHighlightFragment(text, false)); } - /** Append a matched prefix token, to the provided - * {@code StringBuilder}. - * @param sb {@code StringBuilder} to append to + /** Called while highlighting a single result, to append + * the whole matched token to the provided fragments list. + * @param fragments List of {@link LookupHighlightFragment} to add to + * @param surface The surface form (original) text + * @param analyzed The analyzed token corresponding to the surface form text + */ + protected void addWholeMatch(List fragments, String surface, String analyzed) { + fragments.add(new LookupHighlightFragment(surface, true)); + } + + /** Called while highlighting a single result, to append a + * matched prefix token, to the provided fragments list. + * @param fragments List of {@link LookupHighlightFragment} to add to * @param surface The fragment of the surface form * (indexed during {@link #build}, corresponding to * this match * @param analyzed The analyzed token that matched * @param prefixToken The prefix of the token that matched */ - protected void addPrefixMatch(StringBuilder sb, String surface, String analyzed, String prefixToken) { + protected void addPrefixMatch(List fragments, String surface, String analyzed, String prefixToken) { // TODO: apps can try to invert their analysis logic // here, e.g. downcase the two before checking prefix: - sb.append(""); - if (surface.startsWith(prefixToken)) { - sb.append(surface.substring(0, prefixToken.length())); - sb.append(""); - sb.append(surface.substring(prefixToken.length())); + if (prefixToken.length() < surface.length()) { + fragments.add(new LookupHighlightFragment(surface.substring(0, prefixToken.length()), true)); + fragments.add(new LookupHighlightFragment(surface.substring(prefixToken.length()), false)); } else { - sb.append(surface); - sb.append(""); + fragments.add(new LookupHighlightFragment(surface, true)); } } Index: lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java =================================================================== --- lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java (revision 1506631) +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java (working copy) @@ -30,6 +30,7 @@ import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.suggest.Lookup.LookupHighlightFragment; import org.apache.lucene.search.suggest.Lookup.LookupResult; import org.apache.lucene.search.suggest.TermFreqPayload; import org.apache.lucene.search.suggest.TermFreqPayloadArrayIterator; @@ -62,29 +63,29 @@ List results = suggester.lookup(_TestUtil.stringToCharSequence("ear", random()), 10, true, true); assertEquals(2, results.size()); - assertEquals("a penny saved is a penny earned", results.get(0).key); + assertEquals("a penny saved is a penny earned", toString(results.get(0).highlightKey)); assertEquals(10, results.get(0).value); assertEquals(new BytesRef("foobaz"), results.get(0).payload); - assertEquals("lend me your ear", results.get(1).key); + assertEquals("lend me your ear", toString(results.get(1).highlightKey)); assertEquals(8, results.get(1).value); assertEquals(new BytesRef("foobar"), results.get(1).payload); results = suggester.lookup(_TestUtil.stringToCharSequence("ear ", random()), 10, true, true); assertEquals(1, results.size()); - assertEquals("lend me your ear", results.get(0).key); + assertEquals("lend me your ear", toString(results.get(0).highlightKey)); assertEquals(8, results.get(0).value); assertEquals(new BytesRef("foobar"), results.get(0).payload); results = suggester.lookup(_TestUtil.stringToCharSequence("pen", random()), 10, true, true); assertEquals(1, results.size()); - assertEquals("a penny saved is a penny earned", results.get(0).key); + assertEquals("a penny saved is a penny earned", toString(results.get(0).highlightKey)); assertEquals(10, results.get(0).value); assertEquals(new BytesRef("foobaz"), results.get(0).payload); results = suggester.lookup(_TestUtil.stringToCharSequence("p", random()), 10, true, true); assertEquals(1, results.size()); - assertEquals("a penny saved is a penny earned", results.get(0).key); + assertEquals("a penny saved is a penny earned", toString(results.get(0).highlightKey)); assertEquals(10, results.get(0).value); assertEquals(new BytesRef("foobaz"), results.get(0).payload); @@ -117,12 +118,27 @@ }; List results = suggester.lookup(_TestUtil.stringToCharSequence("ear", random()), 10, true, true); assertEquals(2, results.size()); - assertEquals("a penny saved is a penny earned", results.get(0).key); + assertEquals("a penny saved is a penny earned", toString(results.get(0).highlightKey)); assertEquals(10, results.get(0).value); assertEquals(new BytesRef("foobaz"), results.get(0).payload); suggester.close(); } + public String toString(LookupHighlightFragment[] fragments) { + StringBuilder sb = new StringBuilder(); + for(LookupHighlightFragment fragment : fragments) { + if (fragment.isHit) { + sb.append(""); + } + sb.append(fragment.text); + if (fragment.isHit) { + sb.append(""); + } + } + + return sb.toString(); + } + public void testRandomMinPrefixLength() throws Exception { TermFreqPayload keys[] = new TermFreqPayload[] { new TermFreqPayload("lend me your ear", 8, new BytesRef("foobar")), @@ -148,13 +164,13 @@ List results = suggester.lookup(_TestUtil.stringToCharSequence("ear", random()), 10, true, doHighlight); assertEquals(2, results.size()); if (doHighlight) { - assertEquals("a penny saved is a penny earned", results.get(0).key); + assertEquals("a penny saved is a penny earned", toString(results.get(0).highlightKey)); } else { assertEquals("a penny saved is a penny earned", results.get(0).key); } assertEquals(10, results.get(0).value); if (doHighlight) { - assertEquals("lend me your ear", results.get(1).key); + assertEquals("lend me your ear", toString(results.get(1).highlightKey)); } else { assertEquals("lend me your ear", results.get(1).key); } @@ -165,7 +181,7 @@ results = suggester.lookup(_TestUtil.stringToCharSequence("ear ", random()), 10, true, doHighlight); assertEquals(1, results.size()); if (doHighlight) { - assertEquals("lend me your ear", results.get(0).key); + assertEquals("lend me your ear", toString(results.get(0).highlightKey)); } else { assertEquals("lend me your ear", results.get(0).key); } @@ -175,7 +191,7 @@ results = suggester.lookup(_TestUtil.stringToCharSequence("pen", random()), 10, true, doHighlight); assertEquals(1, results.size()); if (doHighlight) { - assertEquals("a penny saved is a penny earned", results.get(0).key); + assertEquals("a penny saved is a penny earned", toString(results.get(0).highlightKey)); } else { assertEquals("a penny saved is a penny earned", results.get(0).key); } @@ -185,7 +201,7 @@ results = suggester.lookup(_TestUtil.stringToCharSequence("p", random()), 10, true, doHighlight); assertEquals(1, results.size()); if (doHighlight) { - assertEquals("a penny saved is a penny earned", results.get(0).key); + assertEquals("a penny saved is a penny earned", toString(results.get(0).highlightKey)); } else { assertEquals("a penny saved is a penny earned", results.get(0).key); } @@ -222,7 +238,7 @@ suggester.build(new TermFreqPayloadArrayIterator(keys)); List results = suggester.lookup(_TestUtil.stringToCharSequence("penn", random()), 10, true, true); assertEquals(1, results.size()); - assertEquals("a penny saved is a penny earned", results.get(0).key); + assertEquals("a penny saved is a penny earned", toString(results.get(0).highlightKey)); suggester.close(); } @@ -243,23 +259,20 @@ suggester.build(new TermFreqPayloadArrayIterator(keys)); List results = suggester.lookup(_TestUtil.stringToCharSequence("penn", random()), 10, true, true); assertEquals(1, results.size()); - assertEquals("a Penny saved is a penny earned", results.get(0).key); + assertEquals("a Penny saved is a penny earned", toString(results.get(0).highlightKey)); suggester.close(); // Try again, but overriding addPrefixMatch to normalize case: suggester = new AnalyzingInfixSuggester(TEST_VERSION_CURRENT, tempDir, a, a, 3) { @Override - protected void addPrefixMatch(StringBuilder sb, String surface, String analyzed, String prefixToken) { + protected void addPrefixMatch(List fragments, String surface, String analyzed, String prefixToken) { prefixToken = prefixToken.toLowerCase(Locale.ROOT); String surfaceLower = surface.toLowerCase(Locale.ROOT); - sb.append(""); if (surfaceLower.startsWith(prefixToken)) { - sb.append(surface.substring(0, prefixToken.length())); - sb.append(""); - sb.append(surface.substring(prefixToken.length())); + fragments.add(new LookupHighlightFragment(surface.substring(0, prefixToken.length()), true)); + fragments.add(new LookupHighlightFragment(surface.substring(prefixToken.length()), false)); } else { - sb.append(surface); - sb.append(""); + fragments.add(new LookupHighlightFragment(surface, true)); } } @@ -271,7 +284,7 @@ suggester.build(new TermFreqPayloadArrayIterator(keys)); results = suggester.lookup(_TestUtil.stringToCharSequence("penn", random()), 10, true, true); assertEquals(1, results.size()); - assertEquals("a Penny saved is a penny earned", results.get(0).key); + assertEquals("a Penny saved is a penny earned", toString(results.get(0).highlightKey)); suggester.close(); } @@ -355,7 +368,7 @@ suggester.build(new TermFreqPayloadArrayIterator(keys)); List results = suggester.lookup(_TestUtil.stringToCharSequence("a", random()), 10, true, true); assertEquals(1, results.size()); - assertEquals("a bob for apples", results.get(0).key); + assertEquals("a bob for apples", toString(results.get(0).highlightKey)); suggester.close(); } }