Index: lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java =================================================================== --- lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java (revision 1400186) +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java (working copy) @@ -803,4 +803,69 @@ List results = suggester.lookup("a", false, 4); } + + public void testExactFirstMissingResult() throws Exception { + + Analyzer a = new MockAnalyzer(random()); + + AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, AnalyzingSuggester.EXACT_FIRST, 256, -1); + + suggester.build(new TermFreqArrayIterator(new TermFreq[] { + new TermFreq("a", 5), + new TermFreq("a b", 3), + new TermFreq("a c", 4), + })); + + // nocommit make this false and confirm test still + // failed before: + List results = suggester.lookup("a", true, 3); + assertEquals(3, results.size()); + assertEquals("a", results.get(0).key); + assertEquals(5, results.get(0).value); + assertEquals("a c", results.get(1).key); + assertEquals(4, results.get(1).value); + assertEquals("a b", results.get(2).key); + assertEquals(3, results.get(2).value); + } + + public void testDupSurfaceFormsMissingResults() throws Exception { + Analyzer a = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true); + + return new TokenStreamComponents(tokenizer) { + + int count; + + @Override + public TokenStream getTokenStream() { + return new CannedTokenStream(new Token[] { + token("hairy", 1, 1), + token("smelly", 0, 1), + token("dog", 1, 1), + }); + } + + @Override + protected void setReader(final Reader reader) throws IOException { + } + }; + } + }; + + AnalyzingSuggester suggester = new AnalyzingSuggester(a, a, 0, 256, -1); + + suggester.build(new TermFreqArrayIterator(new TermFreq[] { + new TermFreq("hambone", 6), + new TermFreq("nellie", 5), + })); + + List results = suggester.lookup("nellie", false, 2); + assertEquals(2, results.size()); + assertEquals("hambone", results.get(0).key); + assertEquals(6, results.get(0).value); + assertEquals("nellie", results.get(1).key); + assertEquals(5, results.get(1).value); + } } Index: lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java =================================================================== --- lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java (revision 1400186) +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java (working copy) @@ -591,16 +591,18 @@ Util.TopNSearcher> searcher; searcher = new Util.TopNSearcher>(fst, - num - results.size(), + num, weightComparator) { private final Set seen = new HashSet(); @Override protected boolean acceptResult(IntsRef input, Pair output) { - + + //System.out.println("ACCEPT? path=" + input); // Dedup: when the input analyzes to a graph we // can get duplicate surface forms: if (seen.contains(output.output2)) { + //System.out.println("SKIP: dup"); return false; } seen.add(output.output2); @@ -630,6 +632,12 @@ LookupResult result = new LookupResult(spare.toString(), decodeWeight(completion.output.output1)); //System.out.println(" result=" + result); results.add(result); + + if (results.size() == num) { + // In the exactFirst=true case the search may + // produce one extra path + break; + } } return results; Index: lucene/core/src/java/org/apache/lucene/util/fst/Util.java =================================================================== --- lucene/core/src/java/org/apache/lucene/util/fst/Util.java (revision 1400186) +++ lucene/core/src/java/org/apache/lucene/util/fst/Util.java (working copy) @@ -325,7 +325,7 @@ if (queue.size() == topN+1) { queue.pollLast(); - } + } } /** Adds all leaving arcs, including 'finished' arc, if @@ -390,8 +390,6 @@ break; } - //System.out.println(" remove init path=" + path); - if (path.arc.label == FST.END_LABEL) { //System.out.println(" empty string! cost=" + path.cost); // Empty string! @@ -400,10 +398,13 @@ continue; } + // LUCENE-4481: TODO: re-enable this pruning if we can make this admissible: + /* if (results.size() == topN-1) { // Last path -- don't bother w/ queue anymore: queue = null; } + */ //System.out.println(" path: " + path);