Index: src/java/org/apache/lucene/search/FilteredTermEnum.java =================================================================== --- src/java/org/apache/lucene/search/FilteredTermEnum.java (revision 764880) +++ src/java/org/apache/lucene/search/FilteredTermEnum.java (working copy) @@ -26,9 +26,12 @@

Term enumerations are always ordered by Term.compareTo(). Each term in the enumeration is greater than all that precede it. */ public abstract class FilteredTermEnum extends TermEnum { - private Term currentTerm = null; - private TermEnum actualEnum = null; + /** the current term */ + protected Term currentTerm = null; + /** the delegate enum - to set this member use {@link #setEnum} */ + protected TermEnum actualEnum = null; + public FilteredTermEnum() {} /** Equality compare on the term */ @@ -40,6 +43,10 @@ /** Indicates the end of the enumeration has been reached */ protected abstract boolean endEnum(); + /** + * use this method to set the actual TermEnum (e.g. in ctor), + * it will be automatically positioned on the first matching term. + */ protected void setEnum(TermEnum actualEnum) throws IOException { this.actualEnum = actualEnum; // Find the first term that matches @@ -54,7 +61,8 @@ * Returns -1 if no Term matches or all terms have been enumerated. */ public int docFreq() { - if (actualEnum == null) return -1; + if (currentTerm == null) return -1; + assert actualEnum != null; return actualEnum.docFreq(); } @@ -85,7 +93,7 @@ /** Closes the enumeration to further activity, freeing resources. */ public void close() throws IOException { - actualEnum.close(); + if (actualEnum != null) actualEnum.close(); currentTerm = null; actualEnum = null; } Index: src/java/org/apache/lucene/search/MultiTermQuery.java =================================================================== --- src/java/org/apache/lucene/search/MultiTermQuery.java (revision 764880) +++ src/java/org/apache/lucene/search/MultiTermQuery.java (working copy) @@ -45,6 +45,7 @@ public abstract class MultiTermQuery extends Query { protected Term term; protected boolean constantScoreRewrite = false; + transient int numberOfTerms = 0; /** Constructs a query for terms matching term. */ public MultiTermQuery(Term term) { @@ -67,6 +68,33 @@ protected abstract FilteredTermEnum getEnum(IndexReader reader) throws IOException; + /** + * Expert: Return the number of unique terms visited during execution of the query. + * If there are many of them, you may consider using another query type + * or optimize your total term count in index. + *

This method is not thread safe, be sure to only call it when no query is running! + * To get correct numbers, be sure to reset the term counter before the query + * with {@link #clearTotalNumberOfTerms}. + *

On optimized indexes / no MultiReaders, you get the correct number of + * unique terms for the whole index. Use this number to compare different queries. + * For non-optimized indexes this number can also be achived in + * non-constant-score mode. In constant-score mode you get the total number of + * terms seeked for all segments / sub-readers. + * @see #clearTotalNumberOfTerms + */ + public int getTotalNumberOfTerms() { + return numberOfTerms; + } + + /** + * Expert: Resets the counting of unique terms. + * Do this before executing the query/filter. + * @see #getTotalNumberOfTerms + */ + public void clearTotalNumberOfTerms() { + numberOfTerms = 0; + } + protected Filter getFilter() { return new MultiTermFilter(this); } @@ -79,6 +107,7 @@ do { Term t = enumerator.term(); if (t != null) { + numberOfTerms++; TermQuery tq = new TermQuery(t); // found a match tq.setBoost(getBoost() * enumerator.difference()); // set the boost query.add(tq, BooleanClause.Occur.SHOULD); // add to query @@ -150,14 +179,14 @@ MultiTermQuery mtq; abstract class TermGenerator { - public void generate(IndexReader reader) throws IOException { - TermEnum enumerator = mtq.getEnum(reader); + public void generate(IndexReader reader, TermEnum enumerator) throws IOException { TermDocs termDocs = reader.termDocs(); try { do { Term term = enumerator.term(); if (term == null) break; + mtq.numberOfTerms++; termDocs.seek(term); while (termDocs.next()) { handleDoc(termDocs.doc()); @@ -165,7 +194,6 @@ } while (enumerator.next()); } finally { termDocs.close(); - enumerator.close(); } } abstract public void handleDoc(int doc); @@ -176,28 +204,40 @@ } public BitSet bits(IndexReader reader) throws IOException { - final BitSet bitSet = new BitSet(reader.maxDoc()); - new TermGenerator() { - public void handleDoc(int doc) { - bitSet.set(doc); - } - }.generate(reader); - return bitSet; + final TermEnum enumerator = mtq.getEnum(reader); + try { + final BitSet bitSet = new BitSet(reader.maxDoc()); + new TermGenerator() { + public void handleDoc(int doc) { + bitSet.set(doc); + } + }.generate(reader, enumerator); + return bitSet; + } finally { + enumerator.close(); + } } public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc()); - new TermGenerator() { - public void handleDoc(int doc) { - bitSet.set(doc); - } - }.generate(reader); - - return bitSet; + final TermEnum enumerator = mtq.getEnum(reader); + try { + // if current term in enum is null, the enum is empty -> shortcut + if (enumerator.term() == null) + return DocIdSet.EMPTY_DOCIDSET; + // else fill into a OpenBitSet + final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc()); + new TermGenerator() { + public void handleDoc(int doc) { + bitSet.set(doc); + } + }.generate(reader, enumerator); + return bitSet; + } finally { + enumerator.close(); + } } public boolean equals(Object o) { - if (this == o) return true; if (!(o instanceof MultiTermFilter))