Index: src/java/org/apache/lucene/search/FilteredTermsEnum.java =================================================================== --- src/java/org/apache/lucene/search/FilteredTermsEnum.java (revision 887236) +++ src/java/org/apache/lucene/search/FilteredTermsEnum.java (working copy) @@ -18,6 +18,8 @@ */ import java.io.IOException; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermRef; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.DocsEnum; @@ -25,121 +27,155 @@ /** * Abstract class for enumerating a subset of all terms. - * - *
On creation, the enumerator must already be positioned - * to the first term.
* *Term enumerations are always ordered by - * Term.compareTo(). Each term in the enumeration is + * {@link #getTermComparator}. Each term in the enumeration is * greater than all that precede it.
*/ public abstract class FilteredTermsEnum extends TermsEnum { + private TermRef initialSeekTerm = null; + private boolean doSeek = true; + private TermsEnum tenum = null; + private String field; + protected static enum AcceptStatus {YES, NO, END}; + + /** Return if term is accepted, not accepted or the iteration ended. */ + protected abstract AcceptStatus accept(TermRef term) throws IOException; - /** the delegate enum - to set this member use {@link #setEnum} */ - protected TermsEnum actualEnum; - - /** Return true if term is accepted */ - protected abstract AcceptStatus accept(TermRef term); - - /** Equality measure on the term */ - public abstract float difference(); + /** Equality measure on the term, used by {@link FuzzyQuery} and + * scoring {@link MultiTermQuery}. */ + public abstract float difference() throws IOException; - public abstract String field(); + /** + * Creates a filtered {@link TermsEnum} for the given field name and reader. + */ + public FilteredTermsEnum(final IndexReader reader, final String field) throws IOException { + this.field = field; + final Terms terms = reader.fields().terms(field); + tenum = (terms != null) ? terms.iterator() : null; + } - /** Only called once, right after construction, to check - * whether there are no matching terms */ - public abstract boolean empty(); - /** - * use this method to set the actual TermsEnum (e.g. in ctor), - * it will be automatically positioned on the first - * accepted term, and returns the term found or null if - * there is no matching term. + * Use this method to set the initial {@link TermRef} + * to seek before iterating. This is a convenience method for + * subclasses that do not override {@link #nextSeekTerm}. + * If the initial seek term is {@code null} (default), + * the enum is empty. + *You can only use this method, if you keep the default + * implementation of {@link #nextSeekTerm}. */ - protected TermRef setEnum(TermsEnum actualEnum, TermRef term) throws IOException { - this.actualEnum = actualEnum; + protected final void setInitialSeekTerm(TermRef term) throws IOException { + this.initialSeekTerm = term; + } + + /** On the first call to {@link #next} or if {@link #accept} returns + * {@link AcceptStatus#END}, this method will be + * called to eventually seek the underlying TermsEnum to a new position. + * This method returns per default only one time the initial seek term + * and then {@code null}, so no repositioning is done. + *
Override this method, if you want a more sophisticated TermsEnum, + * that repositions the iterator during enumeration. + * If this method always returns {@code null} the enum is empty. + */ + protected TermRef nextSeekTerm() throws IOException { + final TermRef t = initialSeekTerm; + initialSeekTerm = null; + return t; + } - // Find the first term that matches - if (term != null) { - SeekStatus status = actualEnum.seek(term); - if (status == SeekStatus.END) { - return null; - } else { - AcceptStatus s = accept(actualEnum.term()); - if (s == AcceptStatus.NO) { - return next(); - } else if (s == AcceptStatus.END) { - return null; - } else { - return actualEnum.term(); - } - } - } else { - return next(); - } + /** returns the field this TermsEnum is working on */ + public final String field() { + return field; } - + @Override public TermRef term() throws IOException { - if(actualEnum == null) { - return null; - } - return actualEnum.term(); + return (tenum == null) ? null : tenum.term(); } @Override - /** Don't call this until after setEnum, else you'll hit NPE */ public TermRef.Comparator getTermComparator() throws IOException { - return actualEnum.getTermComparator(); + return (tenum == null) ? null : tenum.getTermComparator(); } - /** - * Returns the docFreq of the current Term in the enumeration. - * Returns -1 if no Term matches or all terms have been enumerated. - */ @Override public int docFreq() { - assert actualEnum != null; - return actualEnum.docFreq(); + return (tenum == null) ? -1 : tenum.docFreq(); } - /** Increments the enumeration to the next element. - * Non-null if one exists, or null if it's the end. */ @Override public TermRef next() throws IOException { - assert actualEnum != null; - while (true) { - TermRef term = actualEnum.next(); + if (tenum == null) + return null; + for (;;) { + // Seek or forward the iterator + final TermRef term; + if (doSeek) { + doSeek = false; + if ((term = seekNextPosition()) == null) + return null; + } else { + term = tenum.next(); + } + + // check if term is accepted if (term != null) { - AcceptStatus s = accept(term); + final AcceptStatus s = accept(term); if (s == AcceptStatus.YES) { return term; - } else if (s == AcceptStatus.END) { - // end - return null; - } - } else { - // end - return null; + } else if (s == AcceptStatus.NO) { + continue; + } // fall through on END } + + doSeek = true; } } + + private TermRef seekNextPosition() throws IOException { + for (;;) { + final TermRef t = nextSeekTerm(); + if (t == null) return null; + final SeekStatus status = tenum.seek(t); + if (status == SeekStatus.END) + continue; + return tenum.term(); + } + } + // nocommit: In my opinion, if a FilteredTermsEnum supports + // nextSeekTerm as non-singleton-iterator, seeking does not work. + // For MTQ, seeking is not needed at all, so this two methods should + // throw UOE per default and finishSeek() removed. + + /** This enum does not support seeking! + * @throws UnsupportedOperationException + */ @Override public SeekStatus seek(TermRef term) throws IOException { - return finishSeek(actualEnum.seek(term)); + /*if (tenum == null) + return SeekStatus.END; + return finishSeek(tenum.seek(term)); + */ + throw new UnsupportedOperationException("FilteredTermsEnum does not support seeking"); } + /** This enum does not support seeking! + * @throws UnsupportedOperationException + */ @Override public SeekStatus seek(long ord) throws IOException { - return finishSeek(actualEnum.seek(ord)); + /*if (tenum == null) + return SeekStatus.END; + return finishSeek(tenum.seek(ord));*/ + throw new UnsupportedOperationException("FilteredTermsEnum does not support seeking"); } - private SeekStatus finishSeek(SeekStatus status) throws IOException { + /*private SeekStatus finishSeek(SeekStatus status) throws IOException { if (status != SeekStatus.END) { - TermRef term = actualEnum.term(); + TermRef term = tenum.term(); final AcceptStatus s = accept(term); if (s == AcceptStatus.NO) { term = next(); @@ -156,15 +192,15 @@ } else { return status; } - } + }*/ @Override public long ord() throws IOException { - return actualEnum.ord(); + return (tenum == null) ? -1 : tenum.ord(); } @Override public DocsEnum docs(Bits bits) throws IOException { - return actualEnum.docs(bits); + return (tenum == null) ? null : tenum.docs(bits); } } Index: src/java/org/apache/lucene/search/FuzzyTermsEnum.java =================================================================== --- src/java/org/apache/lucene/search/FuzzyTermsEnum.java (revision 887236) +++ src/java/org/apache/lucene/search/FuzzyTermsEnum.java (working copy) @@ -19,7 +19,6 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermRef; import java.io.IOException; @@ -27,8 +26,9 @@ /** Subclass of FilteredTermEnum for enumerating all terms that are similar * to the specified filter term. * - *
Term enumerations are always ordered by Term.compareTo(). Each term in - * the enumeration is greater than all that precede it. + *
Term enumerations are always ordered by + * {@link #getTermComparator}. Each term in the enumeration is + * greater than all that precede it.
*/ public final class FuzzyTermsEnum extends FilteredTermsEnum { @@ -44,10 +44,8 @@ private int[][] d; private float similarity; - private final boolean empty; private Term searchTerm; - private final String field; private final String text; private final String prefix; @@ -102,7 +100,7 @@ * @throws IOException */ public FuzzyTermsEnum(IndexReader reader, Term term, final float minSimilarity, final int prefixLength) throws IOException { - super(); + super(reader, term.field()); if (minSimilarity >= 1.0f) throw new IllegalArgumentException("minimumSimilarity cannot be greater than or equal to 1"); @@ -114,7 +112,6 @@ this.minimumSimilarity = minSimilarity; this.scale_factor = 1.0f / (1.0f - minimumSimilarity); this.searchTerm = term; - this.field = searchTerm.field(); //The prefix could be longer than the word. //It's kind of silly though. It means we must match the entire word. @@ -127,21 +124,11 @@ initializeMaxDistances(); this.d = initDistanceArray(); - Terms terms = reader.fields().terms(field); - if (terms != null) { - empty = setEnum(terms.iterator(), prefixTermRef) == null; - } else { - empty = false; - } + setInitialSeekTerm(prefixTermRef); } private final TermRef prefixTermRef; - @Override - public String field() { - return field; - } - /** * The termCompare method in FuzzyTermEnum uses Levenshtein distance to * calculate the distance between the given term and the comparing term. @@ -163,11 +150,6 @@ return (float)((similarity - minimumSimilarity) * scale_factor); } - @Override - public final boolean empty() { - return empty; - } - /****************************** * Compute Levenshtein distance ******************************/ Index: src/java/org/apache/lucene/search/MultiTermQuery.java =================================================================== --- src/java/org/apache/lucene/search/MultiTermQuery.java (revision 887236) +++ src/java/org/apache/lucene/search/MultiTermQuery.java (working copy) @@ -103,22 +103,17 @@ // nocommit -- if no terms we'd want to return NullQuery BooleanQuery result = new BooleanQuery(true); - if (!termsEnum.empty()) { - final String field = termsEnum.field(); - assert field != null; - int count = 0; - TermRef term = termsEnum.term(); - // first term must exist since termsEnum wasn't null - assert term != null; - do { - TermQuery tq = new TermQuery(new Term(field, term.toString())); // found a match - tq.setBoost(query.getBoost() * termsEnum.difference()); // set the boost - result.add(tq, BooleanClause.Occur.SHOULD); // add to query - count++; - term = termsEnum.next(); - } while(term != null); - query.incTotalNumberOfTerms(count); + final String field = termsEnum.field(); + assert field != null; + int count = 0; + TermRef term; + while ((term = termsEnum.next()) != null) { + TermQuery tq = new TermQuery(new Term(field, term.toString())); // found a match + tq.setBoost(query.getBoost() * termsEnum.difference()); // set the boost + result.add(tq, BooleanClause.Occur.SHOULD); // add to query + count++; } + query.incTotalNumberOfTerms(count); return result; } else { // deprecated case @@ -168,8 +163,11 @@ @Override public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException { // strip the scores off - Query result = new ConstantScoreQuery(new QueryWrapperFilter(super.rewrite(reader, query))); - result.setBoost(query.getBoost()); + Query result = super.rewrite(reader, query); + if (result instanceof BooleanQuery && !((BooleanQuery) result).clauses().isEmpty()) { + result = new ConstantScoreQuery(new QueryWrapperFilter(result)); + result.setBoost(query.getBoost()); + } return result; } @@ -248,54 +246,53 @@ // exhaust the enum before hitting either of the // cutoffs, we use ConstantBooleanQueryRewrite; else, // ConstantFilterRewrite: - final CollectionA {@link Query} that matches numeric values within a
@@ -163,7 +161,7 @@
assert (valSize == 32 || valSize == 64);
if (precisionStep < 1)
throw new IllegalArgumentException("precisionStep must be >=1");
- this.field = StringHelper.intern(field);
+ this.field = field;
this.precisionStep = precisionStep;
this.valSize = valSize;
this.min = min;
@@ -344,7 +342,7 @@
if (o instanceof NumericRangeQuery) {
final NumericRangeQuery q=(NumericRangeQuery)o;
return (
- field==q.field &&
+ field.equals(q.field) &&
(q.min == null ? min == null : q.min.equals(min)) &&
(q.max == null ? max == null : q.max.equals(max)) &&
minInclusive == q.minInclusive &&
@@ -365,15 +363,9 @@
(Boolean.valueOf(minInclusive).hashCode()^0x14fa55fb)+
(Boolean.valueOf(maxInclusive).hashCode()^0x733fa5fe);
}
-
- // field must be interned after reading from stream
- private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
- in.defaultReadObject();
- field = StringHelper.intern(field);
- }
// members (package private, to be also fast accessible by NumericRangeTermEnum)
- String field;
+ final String field;
final int precisionStep, valSize;
final T min, max;
final boolean minInclusive,maxInclusive;
@@ -390,15 +382,12 @@
*/
private final class NumericRangeTermsEnum extends FilteredTermsEnum {
- private final IndexReader reader;
private final LinkedList
- * Term enumerations are always ordered by Term.compareTo(). Each term in
- * the enumeration is greater than all that precede it.
- *
+ * Term enumerations are always ordered by
+ * {@link #getTermComparator}. Each term in the enumeration is
+ * greater than all that precede it.
- * Term enumerations are always ordered by Term.compareTo(). Each term in
- * the enumeration is greater than all that precede it.
+ * Term enumerations are always ordered by
+ * {@link #getTermComparator}. Each term in the enumeration is
+ * greater than all that precede it.