Index: src/java/org/apache/lucene/search/FilteredTermEnum.java =================================================================== --- src/java/org/apache/lucene/search/FilteredTermEnum.java (revision 764880) +++ src/java/org/apache/lucene/search/FilteredTermEnum.java (working copy) @@ -26,9 +26,12 @@
Term enumerations are always ordered by Term.compareTo(). Each term in
the enumeration is greater than all that precede it. */
public abstract class FilteredTermEnum extends TermEnum {
- private Term currentTerm = null;
- private TermEnum actualEnum = null;
+ /** the current term */
+ protected Term currentTerm = null;
+ /** the delegate enum - to set this member use {@link #setEnum} */
+ protected TermEnum actualEnum = null;
+
public FilteredTermEnum() {}
/** Equality compare on the term */
@@ -40,6 +43,10 @@
/** Indicates the end of the enumeration has been reached */
protected abstract boolean endEnum();
+ /**
+ * use this method to set the actual TermEnum (e.g. in ctor),
+ * it will be automatically positioned on the first matching term.
+ */
protected void setEnum(TermEnum actualEnum) throws IOException {
this.actualEnum = actualEnum;
// Find the first term that matches
@@ -54,7 +61,8 @@
* Returns -1 if no Term matches or all terms have been enumerated.
*/
public int docFreq() {
- if (actualEnum == null) return -1;
+ if (currentTerm == null) return -1;
+ assert actualEnum != null;
return actualEnum.docFreq();
}
@@ -85,7 +93,7 @@
/** Closes the enumeration to further activity, freeing resources. */
public void close() throws IOException {
- actualEnum.close();
+ if (actualEnum != null) actualEnum.close();
currentTerm = null;
actualEnum = null;
}
Index: src/java/org/apache/lucene/search/MultiTermQuery.java
===================================================================
--- src/java/org/apache/lucene/search/MultiTermQuery.java (revision 764880)
+++ src/java/org/apache/lucene/search/MultiTermQuery.java (working copy)
@@ -45,6 +45,7 @@
public abstract class MultiTermQuery extends Query {
protected Term term;
protected boolean constantScoreRewrite = false;
+ transient int numberOfTerms = 0;
/** Constructs a query for terms matching term. */
public MultiTermQuery(Term term) {
@@ -67,6 +68,33 @@
protected abstract FilteredTermEnum getEnum(IndexReader reader)
throws IOException;
+ /**
+ * Expert: Return the number of unique terms visited during execution of the query.
+ * If there are many of them, you may consider using another query type
+ * or optimize your total term count in index.
+ *
This method is not thread safe, be sure to only call it when no query is running! + * To get correct numbers, be sure to reset the term counter before the query + * with {@link #clearTotalNumberOfTerms}. + *
On optimized indexes / no MultiReaders, you get the correct number of + * unique terms for the whole index. Use this number to compare different queries. + * For non-optimized indexes this number can also be achived in + * non-constant-score mode. In constant-score mode you get the total number of + * terms seeked for all segments / sub-readers. + * @see #clearTotalNumberOfTerms + */ + public int getTotalNumberOfTerms() { + return numberOfTerms; + } + + /** + * Expert: Resets the counting of unique terms. + * Do this before executing the query/filter. + * @see #getTotalNumberOfTerms + */ + public void clearTotalNumberOfTerms() { + numberOfTerms = 0; + } + protected Filter getFilter() { return new MultiTermFilter(this); } @@ -79,6 +107,7 @@ do { Term t = enumerator.term(); if (t != null) { + numberOfTerms++; TermQuery tq = new TermQuery(t); // found a match tq.setBoost(getBoost() * enumerator.difference()); // set the boost query.add(tq, BooleanClause.Occur.SHOULD); // add to query @@ -150,14 +179,14 @@ MultiTermQuery mtq; abstract class TermGenerator { - public void generate(IndexReader reader) throws IOException { - TermEnum enumerator = mtq.getEnum(reader); + public void generate(IndexReader reader, TermEnum enumerator) throws IOException { TermDocs termDocs = reader.termDocs(); try { do { Term term = enumerator.term(); if (term == null) break; + mtq.numberOfTerms++; termDocs.seek(term); while (termDocs.next()) { handleDoc(termDocs.doc()); @@ -165,7 +194,6 @@ } while (enumerator.next()); } finally { termDocs.close(); - enumerator.close(); } } abstract public void handleDoc(int doc); @@ -176,28 +204,40 @@ } public BitSet bits(IndexReader reader) throws IOException { - final BitSet bitSet = new BitSet(reader.maxDoc()); - new TermGenerator() { - public void handleDoc(int doc) { - bitSet.set(doc); - } - }.generate(reader); - return bitSet; + final TermEnum enumerator = mtq.getEnum(reader); + try { + final BitSet bitSet = new BitSet(reader.maxDoc()); + new TermGenerator() { + public void handleDoc(int doc) { + bitSet.set(doc); + } + }.generate(reader, enumerator); + return bitSet; + } finally { + enumerator.close(); + } } public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc()); - new TermGenerator() { - public void handleDoc(int doc) { - bitSet.set(doc); - } - }.generate(reader); - - return bitSet; + final TermEnum enumerator = mtq.getEnum(reader); + try { + // if current term in enum is null, the enum is empty -> shortcut + if (enumerator.term() == null) + return DocIdSet.EMPTY_DOCIDSET; + // else fill into a OpenBitSet + final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc()); + new TermGenerator() { + public void handleDoc(int doc) { + bitSet.set(doc); + } + }.generate(reader, enumerator); + return bitSet; + } finally { + enumerator.close(); + } } public boolean equals(Object o) { - if (this == o) return true; if (!(o instanceof MultiTermFilter))