Index: src/java/org/apache/lucene/search/FuzzyQuery.java =================================================================== --- src/java/org/apache/lucene/search/FuzzyQuery.java (revision 832854) +++ src/java/org/apache/lucene/search/FuzzyQuery.java (working copy) @@ -19,10 +19,10 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.ToStringUtils; import java.io.IOException; +import java.util.PriorityQueue; /** Implements the fuzzy search query. The similarity measurement * is based on the Levenshtein (edit distance) algorithm. @@ -132,40 +132,40 @@ return new TermQuery(term); } + int maxSize = BooleanQuery.getMaxClauseCount(); + PriorityQueue stQueue = new PriorityQueue(1024); FilteredTermEnum enumerator = getEnum(reader); - int maxClauseCount = BooleanQuery.getMaxClauseCount(); - ScoreTermQueue stQueue = new ScoreTermQueue(maxClauseCount); - ScoreTerm reusableST = null; - try { + ScoreTerm bottomSt = null; do { - float score = 0.0f; - Term t = enumerator.term(); - if (t != null) { - score = enumerator.difference(); - if (reusableST == null) { - reusableST = new ScoreTerm(t, score); - } else if (score >= reusableST.score) { - // reusableST holds the last "rejected" entry, so, if - // this new score is not better than that, there's no - // need to try inserting it - reusableST.score = score; - reusableST.term = t; - } else { - continue; + final Term t = enumerator.term(); + if (t == null) break; + ScoreTerm st = new ScoreTerm(t, enumerator.difference()); + if (stQueue.size() < maxSize) { + // record the current bottom item + if (bottomSt == null || st.compareTo(bottomSt) > 0) { + bottomSt = st; } - - reusableST = stQueue.insertWithOverflow(reusableST); + // add to PQ, as it is not yet filled up + stQueue.offer(st); + } else { + assert bottomSt != null; + // only add to PQ, if the ScoreTerm is greater than the current bottom, + // as all entries will be enqueued after the current bottom and will never be visible + if (st.compareTo(bottomSt) < 0) { + stQueue.offer(st); + } } + //System.out.println("current: "+st.term+"("+st.score+"), bottom: "+bottomSt.term+"("+bottomSt.score+")"); } while (enumerator.next()); } finally { enumerator.close(); } BooleanQuery query = new BooleanQuery(true); - int size = stQueue.size(); + int size = Math.min(stQueue.size(), maxSize); for(int i = 0; i < size; i++){ - ScoreTerm st = stQueue.pop(); + ScoreTerm st = stQueue.poll(); TermQuery tq = new TermQuery(st.term); // found a match tq.setBoost(getBoost() * st.score); // set the boost query.add(tq, BooleanClause.Occur.SHOULD); // add to query @@ -173,10 +173,28 @@ return query; } + + protected static class ScoreTerm implements Comparable { + public Term term; + public float score; + public ScoreTerm(Term term, float score){ + this.term = term; + this.score = score; + } + + public int compareTo(ScoreTerm other) { + if (this.score == other.score) + return this.term.compareTo(other.term); + else + // inverse ordering!!! + return Float.compare(other.score, this.score); + } + } + @Override public String toString(String field) { - StringBuilder buffer = new StringBuilder(); + final StringBuilder buffer = new StringBuilder(); if (!term.field().equals(field)) { buffer.append(term.field()); buffer.append(":"); @@ -188,32 +206,6 @@ return buffer.toString(); } - protected static class ScoreTerm { - public Term term; - public float score; - - public ScoreTerm(Term term, float score){ - this.term = term; - this.score = score; - } - } - - protected static class ScoreTermQueue extends PriorityQueue { - - public ScoreTermQueue(int size){ - initialize(size); - } - - @Override - protected boolean lessThan(ScoreTerm termA, ScoreTerm termB) { - if (termA.score == termB.score) - return termA.term.compareTo(termB.term) > 0; - else - return termA.score < termB.score; - } - - } - @Override public int hashCode() { final int prime = 31; Index: src/test/org/apache/lucene/search/TestFuzzyQuery.java =================================================================== --- src/test/org/apache/lucene/search/TestFuzzyQuery.java (revision 832595) +++ src/test/org/apache/lucene/search/TestFuzzyQuery.java (working copy) @@ -17,6 +17,9 @@ * limitations under the License. */ +import java.util.Set; +import java.util.HashSet; +import java.util.Arrays; import java.io.IOException; import org.apache.lucene.analysis.standard.StandardAnalyzer; @@ -76,6 +79,23 @@ query = new FuzzyQuery(new Term("field", "aaaaa"), FuzzyQuery.defaultMinSimilarity, 6); hits = searcher.search(query, null, 1000).scoreDocs; assertEquals(1, hits.length); + + // test BooleanQuery.maxClauseCount + int savedClauseCount = BooleanQuery.getMaxClauseCount(); + try { + BooleanQuery.setMaxClauseCount(2); + // This query would normally return 3 documents, because 3 terms match: + query = new FuzzyQuery(new Term("field", "aaaab"), FuzzyQuery.defaultMinSimilarity, 3); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("only 2 documents should match", 2, hits.length); + Set possibleTerms = new HashSet(Arrays.asList("aaaaa","aaaab")); + for (int i = 0; i < hits.length; i++) { + final String term = searcher.doc(hits[i].doc).get("field"); + assertTrue("term '" + term + "' should not appear in results", possibleTerms.contains(term)); + } + } finally { + BooleanQuery.setMaxClauseCount(savedClauseCount); + } // not similar enough: query = new FuzzyQuery(new Term("field", "xxxxx"), FuzzyQuery.defaultMinSimilarity, 0);