Index: src/java/org/apache/lucene/search/EmptyTermsEnum.java =================================================================== --- src/java/org/apache/lucene/search/EmptyTermsEnum.java (revision 0) +++ src/java/org/apache/lucene/search/EmptyTermsEnum.java (revision 0) @@ -0,0 +1,62 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.TermRef; +import org.apache.lucene.index.TermsEnum; + +/** + * Subclass of {@code FilteredTermsEnum} that is always empty. + *
+ * This can be used by {@link MultiTermQuery}s (if no terms can ever match the query),
+ * but want to preserve MultiTermQuery semantics such as
+ * {@link MultiTermQuery#rewriteMethod}.
+ */
+public final class EmptyTermsEnum extends FilteredTermsEnum {
+
+ /**
+ * Creates a new EmptyTermsEnum.
+ */
+ public EmptyTermsEnum(final String field) {
+ super((TermsEnum) null, field);
+ }
+
+ @Override
+ /** Always returns {@link AcceptStatus#END}. */
+ protected AcceptStatus accept(TermRef term) {
+ return AcceptStatus.END;
+ }
+
+ @Override
+ public float difference() {
+ return 1.0F;
+ }
+
+ /** Always returns {@link SeekStatus#END}. */
+ @Override
+ public SeekStatus seek(TermRef term) {
+ return SeekStatus.END;
+ }
+
+ /** Always returns {@link SeekStatus#END}. */
+ @Override
+ public SeekStatus seek(long ord) {
+ return SeekStatus.END;
+ }
+
+}
Property changes on: src\java\org\apache\lucene\search\EmptyTermsEnum.java
___________________________________________________________________
Added: svn:keywords
+ Date Author Id Revision HeadURL
Added: svn:eol-style
+ native
Index: src/java/org/apache/lucene/search/FilteredTermsEnum.java
===================================================================
--- src/java/org/apache/lucene/search/FilteredTermsEnum.java (revision 887509)
+++ src/java/org/apache/lucene/search/FilteredTermsEnum.java (working copy)
@@ -18,153 +18,199 @@
*/
import java.io.IOException;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermRef;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Bits;
/**
* Abstract class for enumerating a subset of all terms.
- *
- *
On creation, the enumerator must already be positioned - * to the first term.
* - *Term enumerations are always ordered by - * Term.compareTo(). Each term in the enumeration is + *
Term enumerations should be always ordered by + * {@link #getTermComparator}. Each term in the enumeration is * greater than all that precede it.
-*/ + *This enum cannot guarantee this, if you use seeking mode + * (override {@link #nextSeekTerm}) and provide + * terms to seek out of order and not greater + * than the current term. All {@code FilteredTermsEnum} + * in Lucene Core support this.
+ */ public abstract class FilteredTermsEnum extends TermsEnum { + private final String field; + + private TermRef initialSeekTerm = null; + private boolean doSeek = true; + + protected final TermsEnum tenum; + + /** Return value, if term is accepted, not accepted or the iteration should + * end (and possibly seek). + * @see #accept(TermRef) + */ protected static enum AcceptStatus {YES, NO, END}; + + /** Return if term is accepted, not accepted or the iteration should ended + * (and possibly seek). + */ + protected abstract AcceptStatus accept(TermRef term) throws IOException; - /** the delegate enum - to set this member use {@link #setEnum} */ - protected TermsEnum actualEnum; - - /** Return true if term is accepted */ - protected abstract AcceptStatus accept(TermRef term); - - /** Equality measure on the term */ - public abstract float difference(); + /** Equality measure on the term, used by {@link FuzzyQuery} and + * scoring {@link MultiTermQuery}. */ + public abstract float difference() throws IOException; - public abstract String field(); + /** + * Creates a filtered {@link TermsEnum} for the given field name and reader. + */ + public FilteredTermsEnum(final IndexReader reader, final String field) throws IOException { + this.field = field; + final Terms terms = reader.fields().terms(field); + tenum = (terms != null) ? terms.iterator() : null; + } - /** Only called once, right after construction, to check - * whether there are no matching terms */ - public abstract boolean empty(); + /** + * Creates a filtered {@link TermsEnum} on a terms enum for the given field name. + * @param tenum the terms enumeration to filter, if {@code null} this is the null iterator. + * @param field the field name this enum operates on (needed by {@link MultiTermQuery}). + */ + public FilteredTermsEnum(final TermsEnum tenum, final String field) { + this.tenum = tenum; + this.field = field; + } /** - * use this method to set the actual TermsEnum (e.g. in ctor), - * it will be automatically positioned on the first - * accepted term, and returns the term found or null if - * there is no matching term. + * Use this method to set the initial {@link TermRef} + * to seek before iterating. This is a convenience method for + * subclasses that do not override {@link #nextSeekTerm}. + * If the initial seek term is {@code null} (default), + * the enum is empty. + *You can only use this method, if you keep the default + * implementation of {@link #nextSeekTerm}. */ - protected TermRef setEnum(TermsEnum actualEnum, TermRef term) throws IOException { - this.actualEnum = actualEnum; + protected final void setInitialSeekTerm(TermRef term) throws IOException { + this.initialSeekTerm = term; + } + + /** On the first call to {@link #next} or if {@link #accept} returns + * {@link AcceptStatus#END}, this method will be + * called to eventually seek the underlying TermsEnum to a new position. + * This method returns per default only one time the initial seek term + * and then {@code null}, so no repositioning is done. + *
Override this method, if you want a more sophisticated TermsEnum, + * that repositions the iterator during enumeration. + * If this method always returns {@code null} the enum is empty. + */ + protected TermRef nextSeekTerm() throws IOException { + final TermRef t = initialSeekTerm; + initialSeekTerm = null; + return t; + } - // Find the first term that matches - if (term != null) { - SeekStatus status = actualEnum.seek(term); - if (status == SeekStatus.END) { - return null; - } else { - AcceptStatus s = accept(actualEnum.term()); - if (s == AcceptStatus.NO) { - return next(); - } else if (s == AcceptStatus.END) { - return null; - } else { - return actualEnum.term(); - } - } - } else { - return next(); - } + /** + * Returns the related attributes, the returned {@link AttributeSource} + * is shared with the delegate {@code TermsEnum}. + */ + @Override + public AttributeSource attributes() { + /* if we have no tenum, we return a new attributes instance, + * to prevent NPE in subclasses that use attributes. + * in all other cases we share the attributes with our delegate. */ + return (tenum == null) ? super.attributes() : tenum.attributes(); } + /** returns the field this TermsEnum is working on */ + public final String field() { + return field; + } + @Override public TermRef term() throws IOException { - if(actualEnum == null) { - return null; - } - return actualEnum.term(); + return (tenum == null) ? null : tenum.term(); } @Override - /** Don't call this until after setEnum, else you'll hit NPE */ public TermRef.Comparator getTermComparator() throws IOException { - return actualEnum.getTermComparator(); + return (tenum == null) ? null : tenum.getTermComparator(); } - /** - * Returns the docFreq of the current Term in the enumeration. - * Returns -1 if no Term matches or all terms have been enumerated. - */ @Override public int docFreq() { - assert actualEnum != null; - return actualEnum.docFreq(); + return (tenum == null) ? -1 : tenum.docFreq(); } - - /** Increments the enumeration to the next element. - * Non-null if one exists, or null if it's the end. */ - @Override - public TermRef next() throws IOException { - assert actualEnum != null; - while (true) { - TermRef term = actualEnum.next(); - if (term != null) { - AcceptStatus s = accept(term); - if (s == AcceptStatus.YES) { - return term; - } else if (s == AcceptStatus.END) { - // end - return null; - } - } else { - // end - return null; - } - } - } + /** This enum does not support seeking! + * @throws UnsupportedOperationException + */ @Override public SeekStatus seek(TermRef term) throws IOException { - return finishSeek(actualEnum.seek(term)); + throw new UnsupportedOperationException("FilteredTermsEnum does not support seeking"); } + /** This enum does not support seeking! + * @throws UnsupportedOperationException + */ @Override public SeekStatus seek(long ord) throws IOException { - return finishSeek(actualEnum.seek(ord)); + throw new UnsupportedOperationException("FilteredTermsEnum does not support seeking"); } - private SeekStatus finishSeek(SeekStatus status) throws IOException { - if (status != SeekStatus.END) { - TermRef term = actualEnum.term(); - final AcceptStatus s = accept(term); - if (s == AcceptStatus.NO) { - term = next(); + @Override + public long ord() throws IOException { + return (tenum == null) ? -1 : tenum.ord(); + } + + @Override + public DocsEnum docs(Bits bits) throws IOException { + return (tenum == null) ? null : tenum.docs(bits); + } + + @Override + public TermRef next() throws IOException { + if (tenum == null) + return null; + for (;;) { + // Seek or forward the iterator + final TermRef term; + if (doSeek) { + doSeek = false; + term = seekNextPosition(); if (term == null) { - return SeekStatus.END; - } else { - return SeekStatus.NOT_FOUND; + // we reached the end as no more seek operation + return null; } - } else if (s == AcceptStatus.END) { - return SeekStatus.END; } else { - return status; + term = tenum.next(); } - } else { - return status; + + // check if term is accepted + if (term != null) { + switch (accept(term)) { + case YES: + return term; + case END: + // actual iteration ended, we must seek now + doSeek = true; + break; + } + } else { + // actual iteration ended, we must seek now + doSeek = true; + } } } - - @Override - public long ord() throws IOException { - return actualEnum.ord(); + + private TermRef seekNextPosition() throws IOException { + for (;;) { + final TermRef t = nextSeekTerm(); + if (t == null) + return null; + final SeekStatus status = tenum.seek(t); + if (status == SeekStatus.END) + continue; + return tenum.term(); + } } - - @Override - public DocsEnum docs(Bits bits) throws IOException { - return actualEnum.docs(bits); - } } Index: src/java/org/apache/lucene/search/FuzzyTermsEnum.java =================================================================== --- src/java/org/apache/lucene/search/FuzzyTermsEnum.java (revision 887509) +++ src/java/org/apache/lucene/search/FuzzyTermsEnum.java (working copy) @@ -19,7 +19,6 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermRef; import java.io.IOException; @@ -27,8 +26,9 @@ /** Subclass of FilteredTermEnum for enumerating all terms that are similar * to the specified filter term. * - *
Term enumerations are always ordered by Term.compareTo(). Each term in - * the enumeration is greater than all that precede it. + *
Term enumerations are always ordered by + * {@link #getTermComparator}. Each term in the enumeration is + * greater than all that precede it.
*/ public final class FuzzyTermsEnum extends FilteredTermsEnum { @@ -44,10 +44,8 @@ private int[][] d; private float similarity; - private final boolean empty; private Term searchTerm; - private final String field; private final String text; private final String prefix; @@ -102,7 +100,7 @@ * @throws IOException */ public FuzzyTermsEnum(IndexReader reader, Term term, final float minSimilarity, final int prefixLength) throws IOException { - super(); + super(reader, term.field()); if (minSimilarity >= 1.0f) throw new IllegalArgumentException("minimumSimilarity cannot be greater than or equal to 1"); @@ -114,7 +112,6 @@ this.minimumSimilarity = minSimilarity; this.scale_factor = 1.0f / (1.0f - minimumSimilarity); this.searchTerm = term; - this.field = searchTerm.field(); //The prefix could be longer than the word. //It's kind of silly though. It means we must match the entire word. @@ -127,21 +124,11 @@ initializeMaxDistances(); this.d = initDistanceArray(); - Terms terms = reader.fields().terms(field); - if (terms != null) { - empty = setEnum(terms.iterator(), prefixTermRef) == null; - } else { - empty = false; - } + setInitialSeekTerm(prefixTermRef); } private final TermRef prefixTermRef; - @Override - public String field() { - return field; - } - /** * The termCompare method in FuzzyTermEnum uses Levenshtein distance to * calculate the distance between the given term and the comparing term. @@ -163,11 +150,6 @@ return (float)((similarity - minimumSimilarity) * scale_factor); } - @Override - public final boolean empty() { - return empty; - } - /****************************** * Compute Levenshtein distance ******************************/ Index: src/java/org/apache/lucene/search/MultiTermQuery.java =================================================================== --- src/java/org/apache/lucene/search/MultiTermQuery.java (revision 887509) +++ src/java/org/apache/lucene/search/MultiTermQuery.java (working copy) @@ -103,22 +103,17 @@ // nocommit -- if no terms we'd want to return NullQuery BooleanQuery result = new BooleanQuery(true); - if (!termsEnum.empty()) { - final String field = termsEnum.field(); - assert field != null; - int count = 0; - TermRef term = termsEnum.term(); - // first term must exist since termsEnum wasn't null - assert term != null; - do { - TermQuery tq = new TermQuery(new Term(field, term.toString())); // found a match - tq.setBoost(query.getBoost() * termsEnum.difference()); // set the boost - result.add(tq, BooleanClause.Occur.SHOULD); // add to query - count++; - term = termsEnum.next(); - } while(term != null); - query.incTotalNumberOfTerms(count); + final String field = termsEnum.field(); + assert field != null; + int count = 0; + TermRef term; + while ((term = termsEnum.next()) != null) { + TermQuery tq = new TermQuery(new Term(field, term.toString())); // found a match + tq.setBoost(query.getBoost() * termsEnum.difference()); // set the boost + result.add(tq, BooleanClause.Occur.SHOULD); // add to query + count++; } + query.incTotalNumberOfTerms(count); return result; } else { // deprecated case @@ -167,9 +162,14 @@ private static class ConstantScoreBooleanQueryRewrite extends ScoringBooleanQueryRewrite implements Serializable { @Override public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException { - // strip the scores off - Query result = new ConstantScoreQuery(new QueryWrapperFilter(super.rewrite(reader, query))); - result.setBoost(query.getBoost()); + Query result = super.rewrite(reader, query); + assert result instanceof BooleanQuery; + // nocommit: if empty boolean query return NullQuery + if (!((BooleanQuery) result).clauses().isEmpty()) { + // strip the scores off + result = new ConstantScoreQuery(new QueryWrapperFilter(result)); + result.setBoost(query.getBoost()); + } return result; } @@ -248,54 +248,53 @@ // exhaust the enum before hitting either of the // cutoffs, we use ConstantBooleanQueryRewrite; else, // ConstantFilterRewrite: - final CollectionA {@link Query} that matches numeric values within a
@@ -163,7 +161,7 @@
assert (valSize == 32 || valSize == 64);
if (precisionStep < 1)
throw new IllegalArgumentException("precisionStep must be >=1");
- this.field = StringHelper.intern(field);
+ this.field = field;
this.precisionStep = precisionStep;
this.valSize = valSize;
this.min = min;
@@ -303,9 +301,12 @@
return new NumericRangeQuery
- * Term enumerations are always ordered by Term.compareTo(). Each term in
- * the enumeration is greater than all that precede it.
- *
+ * Term enumerations are always ordered by
+ * {@link #getTermComparator}. Each term in the enumeration is
+ * greater than all that precede it.
- * Term enumerations are always ordered by Term.compareTo(). Each term in
- * the enumeration is greater than all that precede it.
+ * Term enumerations are always ordered by
+ * {@link #getTermComparator}. Each term in the enumeration is
+ * greater than all that precede it.SingleTermsEnum.
@@ -43,38 +41,18 @@
* if it exists.
*/
public SingleTermsEnum(IndexReader reader, Term singleTerm) throws IOException {
- this.singleTerm = singleTerm;
- Terms terms = reader.fields().terms(singleTerm.field());
- if (terms != null) {
- singleRef = new TermRef(singleTerm.text());
- empty = setEnum(terms.iterator(), singleRef) == null;
- } else {
- empty = true;
- singleRef = null;
- }
+ super(reader, singleTerm.field());
+ singleRef = new TermRef(singleTerm.text());
+ setInitialSeekTerm(singleRef);
}
@Override
protected AcceptStatus accept(TermRef term) {
- if (term.equals(singleRef)) {
- return AcceptStatus.YES;
- } else {
- return AcceptStatus.END;
- }
+ return term.equals(singleRef) ? AcceptStatus.YES : AcceptStatus.END;
}
@Override
public float difference() {
return 1.0F;
}
-
- @Override
- public boolean empty() {
- return empty;
- }
-
- @Override
- public String field() {
- return singleTerm.field();
- }
}
Index: src/java/org/apache/lucene/search/TermRangeQuery.java
===================================================================
--- src/java/org/apache/lucene/search/TermRangeQuery.java (revision 887509)
+++ src/java/org/apache/lucene/search/TermRangeQuery.java (working copy)
@@ -142,10 +142,10 @@
@Override
protected FilteredTermsEnum getTermsEnum(IndexReader reader) throws IOException {
- return new TermRangeTermsEnum(reader, field,
- lowerTerm, upperTerm,
- includeLower, includeUpper,
- collator);
+ return (collator == null && lowerTerm != null && upperTerm != null && lowerTerm.compareTo(upperTerm) > 0) ?
+ new EmptyTermsEnum(field) :
+ new TermRangeTermsEnum(reader, field,
+ lowerTerm, upperTerm, includeLower, includeUpper, collator);
}
/** Prints a user-readable version of this query. */
Index: src/java/org/apache/lucene/search/TermRangeTermsEnum.java
===================================================================
--- src/java/org/apache/lucene/search/TermRangeTermsEnum.java (revision 887509)
+++ src/java/org/apache/lucene/search/TermRangeTermsEnum.java (working copy)
@@ -22,28 +22,24 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermRef;
-import org.apache.lucene.index.Terms;
-//import org.apache.lucene.index.Term;
import org.apache.lucene.util.StringHelper;
/**
* Subclass of FilteredTermEnum for enumerating all terms that match the
* specified range parameters.
- *