Index: contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java =================================================================== --- contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (revision 887712) +++ contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (working copy) @@ -150,11 +150,16 @@ mtq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); query = mtq; } - FakeReader fReader = new FakeReader(); - MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE.rewrite(fReader, mtq); - if (fReader.field != null) { - IndexReader ir = getReaderForField(fReader.field); + if (mtq.getField() != null) { + IndexReader ir = getReaderForField(mtq.getField()); extract(query.rewrite(ir), terms); + } else { + FakeReader fReader = new FakeReader(); + MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE.rewrite(fReader, mtq); + if (fReader.field != null) { + IndexReader ir = getReaderForField(fReader.field); + extract(query.rewrite(ir), terms); + } } } else if (query instanceof MultiPhraseQuery) { final MultiPhraseQuery mpq = (MultiPhraseQuery) query; Index: contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java =================================================================== --- contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java (revision 887712) +++ contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java (working copy) @@ -204,21 +204,21 @@ int df = reader.docFreq(startTerm); int numVariants=0; int totalVariantDocFreqs=0; - if (!fe.empty()) { - do { - TermRef possibleMatch = fe.term(); + TermRef possibleMatch; + MultiTermQuery.TermDifferenceAttribute diffAtt = + fe.attributes().addAttribute(MultiTermQuery.TermDifferenceAttribute.class); + while ((possibleMatch = fe.next()) != null) { if (possibleMatch!=null) { numVariants++; totalVariantDocFreqs+=fe.docFreq(); - float score=fe.difference(); + float score=diffAtt.getDifference(); if (variantsQ.size() < MAX_VARIANTS_PER_TERM || score > minScore){ ScoreTerm st=new ScoreTerm(new Term(startTerm.field(), possibleMatch.toString()),score,startTerm); variantsQ.insertWithOverflow(st); minScore = variantsQ.top().score; // maintain minScore } } - } while(fe.next() != null); - } + } if(numVariants>0) { Index: contrib/regex/src/java/org/apache/lucene/search/regex/RegexQuery.java =================================================================== --- contrib/regex/src/java/org/apache/lucene/search/regex/RegexQuery.java (revision 887712) +++ contrib/regex/src/java/org/apache/lucene/search/regex/RegexQuery.java (working copy) @@ -38,6 +38,7 @@ /** Constructs a query for terms matching term. */ public RegexQuery(Term term) { + super(term.field()); this.term = term; } Index: contrib/regex/src/java/org/apache/lucene/search/regex/RegexTermsEnum.java =================================================================== --- contrib/regex/src/java/org/apache/lucene/search/regex/RegexTermsEnum.java (revision 887712) +++ contrib/regex/src/java/org/apache/lucene/search/regex/RegexTermsEnum.java (working copy) @@ -37,15 +37,12 @@ */ public class RegexTermsEnum extends FilteredTermsEnum { - private String field = ""; private String pre = ""; - private final boolean empty; private RegexCapabilities regexImpl; private final TermRef prefixRef; public RegexTermsEnum(IndexReader reader, Term term, RegexCapabilities regexImpl) throws IOException { - super(); - field = term.field(); + super(reader, term.field()); String text = term.text(); this.regexImpl = regexImpl; @@ -54,19 +51,11 @@ pre = regexImpl.prefix(); if (pre == null) pre = ""; - Terms terms = reader.fields().terms(term.field()); prefixRef = new TermRef(pre); - if (terms != null) { - empty = setEnum(terms.iterator(), prefixRef) == null; - } else { - empty = true; - } + setInitialSeekTerm(prefixRef); } - public String field() { - return field; - } - + @Override protected final AcceptStatus accept(TermRef term) { if (term.startsWith(prefixRef)) { return regexImpl.match(term.toString()) ? AcceptStatus.YES : AcceptStatus.NO; @@ -74,13 +63,4 @@ return AcceptStatus.END; } } - - public final float difference() { -// TODO: adjust difference based on distance of searchTerm.text() and term().text() - return 1.0f; - } - - public final boolean empty() { - return empty; - } } Index: src/java/org/apache/lucene/search/ConstantScoreQuery.java =================================================================== --- src/java/org/apache/lucene/search/ConstantScoreQuery.java (revision 887712) +++ src/java/org/apache/lucene/search/ConstantScoreQuery.java (working copy) @@ -161,8 +161,8 @@ /** Prints a user-readable version of this query. */ @Override public String toString(String field) { - return "ConstantScore(" + filter.toString() - + (getBoost()==1.0 ? ")" : "^" + getBoost()); + return "ConstantScore(" + filter.toString() + ")" + + (getBoost()==1.0 ? "" : "^" + getBoost()); } /** Returns true if o is equal to this. */ Index: src/java/org/apache/lucene/search/EmptyTermsEnum.java =================================================================== --- src/java/org/apache/lucene/search/EmptyTermsEnum.java (revision 0) +++ src/java/org/apache/lucene/search/EmptyTermsEnum.java (revision 0) @@ -0,0 +1,57 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.TermRef; +import org.apache.lucene.index.TermsEnum; + +/** + * Subclass of {@code FilteredTermsEnum} that is always empty. + *

+ * This can be used by {@link MultiTermQuery}s (if no terms can ever match the query), + * but want to preserve MultiTermQuery semantics such as + * {@link MultiTermQuery#rewriteMethod}. + */ +public final class EmptyTermsEnum extends FilteredTermsEnum { + + /** + * Creates a new EmptyTermsEnum. + */ + public EmptyTermsEnum() { + super((TermsEnum) null); + } + + @Override + /** Always returns {@link AcceptStatus#END}. */ + protected AcceptStatus accept(TermRef term) { + return AcceptStatus.END; + } + + /** Always returns {@link SeekStatus#END}. */ + @Override + public SeekStatus seek(TermRef term) { + return SeekStatus.END; + } + + /** Always returns {@link SeekStatus#END}. */ + @Override + public SeekStatus seek(long ord) { + return SeekStatus.END; + } + +} Property changes on: src\java\org\apache\lucene\search\EmptyTermsEnum.java ___________________________________________________________________ Added: svn:keywords + Date Author Id Revision HeadURL Added: svn:eol-style + native Index: src/java/org/apache/lucene/search/FilteredTermEnum.java =================================================================== --- src/java/org/apache/lucene/search/FilteredTermEnum.java (revision 887712) +++ src/java/org/apache/lucene/search/FilteredTermEnum.java (working copy) @@ -28,6 +28,7 @@ @deprecated Switch to {@link FilteredTermsEnum} instead. */ +@Deprecated public abstract class FilteredTermEnum extends TermEnum { /** the current term */ protected Term currentTerm = null; Index: src/java/org/apache/lucene/search/FilteredTermsEnum.java =================================================================== --- src/java/org/apache/lucene/search/FilteredTermsEnum.java (revision 887712) +++ src/java/org/apache/lucene/search/FilteredTermsEnum.java (working copy) @@ -18,153 +18,186 @@ */ import java.io.IOException; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermRef; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.Bits; /** * Abstract class for enumerating a subset of all terms. - * - *

On creation, the enumerator must already be positioned - * to the first term.

* *

Term enumerations are always ordered by - * Term.compareTo(). Each term in the enumeration is + * {@link #getTermComparator}. Each term in the enumeration is * greater than all that precede it.

-*/ + *

Please note: Consumers of this enum cannot + * call {@code seek()}, it is forward only; it throws + * {@link UnsupportedOperationException} when a seeking method + * is called. + */ public abstract class FilteredTermsEnum extends TermsEnum { - protected static enum AcceptStatus {YES, NO, END}; - - /** the delegate enum - to set this member use {@link #setEnum} */ - protected TermsEnum actualEnum; - - /** Return true if term is accepted */ - protected abstract AcceptStatus accept(TermRef term); - - /** Equality measure on the term */ - public abstract float difference(); + private TermRef initialSeekTerm = null; + private boolean doSeek = true; - public abstract String field(); + protected final TermsEnum tenum; - /** Only called once, right after construction, to check - * whether there are no matching terms */ - public abstract boolean empty(); + /** Return value, if term should be accepted or the iteration should + * {@code END}. The {@code *_SEEK} values denote, that after handling the current term + * the enum should call {@link nextSeekTerm()} and step forward. + * @see #accept(TermRef) + */ + protected static enum AcceptStatus {YES, YES_AND_SEEK, NO, NO_AND_SEEK, END}; + + /** Return if term is accepted, not accepted or the iteration should ended + * (and possibly seek). + */ + protected abstract AcceptStatus accept(TermRef term) throws IOException; /** - * use this method to set the actual TermsEnum (e.g. in ctor), - * it will be automatically positioned on the first - * accepted term, and returns the term found or null if - * there is no matching term. + * Creates a filtered {@link TermsEnum} for the given field name and reader. */ - protected TermRef setEnum(TermsEnum actualEnum, TermRef term) throws IOException { - this.actualEnum = actualEnum; + public FilteredTermsEnum(final IndexReader reader, final String field) throws IOException { + final Terms terms = reader.fields().terms(field); + tenum = (terms != null) ? terms.iterator() : null; + } - // Find the first term that matches - if (term != null) { - SeekStatus status = actualEnum.seek(term); - if (status == SeekStatus.END) { - return null; - } else { - AcceptStatus s = accept(actualEnum.term()); - if (s == AcceptStatus.NO) { - return next(); - } else if (s == AcceptStatus.END) { - return null; - } else { - return actualEnum.term(); - } - } - } else { - return next(); - } + /** + * Creates a filtered {@link TermsEnum} on a terms enum for the given field name. + * @param tenum the terms enumeration to filter, if {@code null} this is the null iterator. + * @param field the field name this enum operates on (needed by {@link MultiTermQuery}). + */ + public FilteredTermsEnum(final TermsEnum tenum) { + this.tenum = tenum; } + /** + * Use this method to set the initial {@link TermRef} + * to seek before iterating. This is a convenience method for + * subclasses that do not override {@link #nextSeekTerm}. + * If the initial seek term is {@code null} (default), + * the enum is empty. + *

You can only use this method, if you keep the default + * implementation of {@link #nextSeekTerm}. + */ + protected final void setInitialSeekTerm(TermRef term) throws IOException { + this.initialSeekTerm = term; + } + + /** On the first call to {@link #next} or if {@link #accept} returns + * {@link AcceptStatus#YES_AND_SEEK} or {@link AcceptStatus#NO_AND_SEEK}, + * this method will be called to eventually seek the underlying TermsEnum + * to a new position. + * This method returns per default only one time the initial seek term + * and then {@code null}, so no repositioning is ever done. + *

Override this method, if you want a more sophisticated TermsEnum, + * that repositions the iterator during enumeration. + * If this method always returns {@code null} the enum is empty. + *

Please note: This method should always provide a greater term + * than the last enumerated term, else the behaviour of this enum + * violates the contract for TermsEnums. + */ + protected TermRef nextSeekTerm() throws IOException { + final TermRef t = initialSeekTerm; + initialSeekTerm = null; + return t; + } + + /** + * Returns the related attributes, the returned {@link AttributeSource} + * is shared with the delegate {@code TermsEnum}. + */ @Override + public AttributeSource attributes() { + /* if we have no tenum, we return a new attributes instance, + * to prevent NPE in subclasses that use attributes. + * in all other cases we share the attributes with our delegate. */ + return (tenum == null) ? super.attributes() : tenum.attributes(); + } + + @Override public TermRef term() throws IOException { - if(actualEnum == null) { - return null; - } - return actualEnum.term(); + return (tenum == null) ? null : tenum.term(); } @Override - /** Don't call this until after setEnum, else you'll hit NPE */ public TermRef.Comparator getTermComparator() throws IOException { - return actualEnum.getTermComparator(); + return (tenum == null) ? null : tenum.getTermComparator(); } - /** - * Returns the docFreq of the current Term in the enumeration. - * Returns -1 if no Term matches or all terms have been enumerated. - */ @Override public int docFreq() { - assert actualEnum != null; - return actualEnum.docFreq(); + return (tenum == null) ? -1 : tenum.docFreq(); } - - /** Increments the enumeration to the next element. - * Non-null if one exists, or null if it's the end. */ - @Override - public TermRef next() throws IOException { - assert actualEnum != null; - while (true) { - TermRef term = actualEnum.next(); - if (term != null) { - AcceptStatus s = accept(term); - if (s == AcceptStatus.YES) { - return term; - } else if (s == AcceptStatus.END) { - // end - return null; - } - } else { - // end - return null; - } - } - } + /** This enum does not support seeking! + * @throws UnsupportedOperationException + */ @Override public SeekStatus seek(TermRef term) throws IOException { - return finishSeek(actualEnum.seek(term)); + throw new UnsupportedOperationException(getClass().getName()+" does not support seeking"); } + /** This enum does not support seeking! + * @throws UnsupportedOperationException + */ @Override public SeekStatus seek(long ord) throws IOException { - return finishSeek(actualEnum.seek(ord)); + throw new UnsupportedOperationException(getClass().getName()+" does not support seeking"); } - private SeekStatus finishSeek(SeekStatus status) throws IOException { - if (status != SeekStatus.END) { - TermRef term = actualEnum.term(); - final AcceptStatus s = accept(term); - if (s == AcceptStatus.NO) { - term = next(); - if (term == null) { - return SeekStatus.END; - } else { - return SeekStatus.NOT_FOUND; - } - } else if (s == AcceptStatus.END) { - return SeekStatus.END; - } else { - return status; - } - } else { - return status; - } - } - @Override public long ord() throws IOException { - return actualEnum.ord(); + return (tenum == null) ? -1 : tenum.ord(); } @Override public DocsEnum docs(Bits bits) throws IOException { - return actualEnum.docs(bits); + return (tenum == null) ? null : tenum.docs(bits); } + + @Override + public TermRef next() throws IOException { + if (tenum == null) + return null; + for (;;) { + // Seek or forward the iterator + final TermRef term; + if (doSeek) { + doSeek = false; + final TermRef t = nextSeekTerm(); + if (t == null || tenum.seek(t) == SeekStatus.END) { + // no more terms to seek to or enum exhausted + return null; + } + term = tenum.term(); + } else { + term = tenum.next(); + if (term == null) { + // enum exhausted + return null; + } + } + + // check if term is accepted + switch (accept(term)) { + case YES_AND_SEEK: + doSeek = true; + // term accepted, but we need to seek so fall-through + case YES: + // term accepted + return term; + case NO_AND_SEEK: + // invalid term, seek next time + doSeek = true; + break; + case END: + // we are supposed to end the enum + return null; + } + } + } + } Index: src/java/org/apache/lucene/search/FuzzyQuery.java =================================================================== --- src/java/org/apache/lucene/search/FuzzyQuery.java (revision 887712) +++ src/java/org/apache/lucene/search/FuzzyQuery.java (working copy) @@ -19,9 +19,12 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermRef; +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.ToStringUtils; import java.io.IOException; +import java.io.Serializable; import java.util.PriorityQueue; /** Implements the fuzzy search query. The similarity measurement @@ -34,6 +37,63 @@ */ public class FuzzyQuery extends MultiTermQuery { + private static class FuzzyRewrite extends RewriteMethod implements Serializable { + @Override + public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException { + int maxSize = BooleanQuery.getMaxClauseCount(); + PriorityQueue stQueue = new PriorityQueue(1024); + + TermsEnum termsEnum = query.getTermsEnum(reader); + assert termsEnum != null; + final String field = query.field; + if (field == null) + throw new NullPointerException("When you implement getTermsEnum(), you must specify a field."); + final MultiTermQuery.TermDifferenceAttribute diffAtt = + termsEnum.attributes().addAttribute(MultiTermQuery.TermDifferenceAttribute.class); + ScoreTerm bottomSt = null; + TermRef t; + final Term placeholderTerm = new Term(field); + while ((t = termsEnum.next()) != null) { + if (t == null) break; + ScoreTerm st = new ScoreTerm(placeholderTerm.createTerm(t.toString()), diffAtt.getDifference()); + if (stQueue.size() < maxSize) { + // record the current bottom item + if (bottomSt == null || st.compareTo(bottomSt) > 0) { + bottomSt = st; + } + // add to PQ, as it is not yet filled up + stQueue.offer(st); + } else { + assert bottomSt != null; + // only add to PQ, if the ScoreTerm is greater than the current bottom, + // as all entries will be enqueued after the current bottom and will never be visible + if (st.compareTo(bottomSt) < 0) { + stQueue.offer(st); + } + } + //System.out.println("current: "+st.term+"("+st.score+"), bottom: "+bottomSt.term+"("+bottomSt.score+")"); + } + + BooleanQuery bq = new BooleanQuery(true); + int size = Math.min(stQueue.size(), maxSize); + for(int i = 0; i < size; i++){ + ScoreTerm st = stQueue.poll(); + TermQuery tq = new TermQuery(st.term); // found a match + tq.setBoost(query.getBoost() * st.score); // set the boost + bq.add(tq, BooleanClause.Occur.SHOULD); // add to query + } + query.incTotalNumberOfTerms(bq.clauses().size()); + return bq; + } + + // Make sure we are still a singleton even after deserializing + protected Object readResolve() { + return FUZZY_REWRITE; + } + } + + private final static RewriteMethod FUZZY_REWRITE = new FuzzyRewrite(); + public final static float defaultMinSimilarity = 0.5f; public final static int defaultPrefixLength = 0; @@ -60,6 +120,7 @@ * or if prefixLength < 0 */ public FuzzyQuery(Term term, float minimumSimilarity, int prefixLength) throws IllegalArgumentException { + super(term.field()); this.term = term; if (minimumSimilarity >= 1.0f) @@ -75,14 +136,14 @@ this.minimumSimilarity = minimumSimilarity; this.prefixLength = prefixLength; - rewriteMethod = SCORING_BOOLEAN_QUERY_REWRITE; + rewriteMethod = FUZZY_REWRITE; } /** * Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, minimumSimilarity, 0)}. */ public FuzzyQuery(Term term, float minimumSimilarity) throws IllegalArgumentException { - this(term, minimumSimilarity, defaultPrefixLength); + this(term, minimumSimilarity, defaultPrefixLength); } /** @@ -109,14 +170,19 @@ return prefixLength; } - // @deprecated see #getTermsEnum - @Override + @Override @Deprecated protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { + if (!termLongEnough) { // can only match if it's exact + return new SingleTermEnum(reader, term); + } return new FuzzyTermEnum(reader, getTerm(), minimumSimilarity, prefixLength); } @Override - protected FilteredTermsEnum getTermsEnum(IndexReader reader) throws IOException { + protected TermsEnum getTermsEnum(IndexReader reader) throws IOException { + if (!termLongEnough) { // can only match if it's exact + return new SingleTermsEnum(reader, term); + } return new FuzzyTermsEnum(reader, getTerm(), minimumSimilarity, prefixLength); } @@ -132,55 +198,6 @@ throw new UnsupportedOperationException("FuzzyQuery cannot change rewrite method"); } - @Override - public Query rewrite(IndexReader reader) throws IOException { - if(!termLongEnough) { // can only match if it's exact - return new TermQuery(term); - } - - int maxSize = BooleanQuery.getMaxClauseCount(); - PriorityQueue stQueue = new PriorityQueue(1024); - //nocommit: use termsEnum - FilteredTermEnum enumerator = getEnum(reader); - try { - ScoreTerm bottomSt = null; - do { - final Term t = enumerator.term(); - if (t == null) break; - ScoreTerm st = new ScoreTerm(t, enumerator.difference()); - if (stQueue.size() < maxSize) { - // record the current bottom item - if (bottomSt == null || st.compareTo(bottomSt) > 0) { - bottomSt = st; - } - // add to PQ, as it is not yet filled up - stQueue.offer(st); - } else { - assert bottomSt != null; - // only add to PQ, if the ScoreTerm is greater than the current bottom, - // as all entries will be enqueued after the current bottom and will never be visible - if (st.compareTo(bottomSt) < 0) { - stQueue.offer(st); - } - } - //System.out.println("current: "+st.term+"("+st.score+"), bottom: "+bottomSt.term+"("+bottomSt.score+")"); - } while (enumerator.next()); - } finally { - enumerator.close(); - } - - BooleanQuery query = new BooleanQuery(true); - int size = Math.min(stQueue.size(), maxSize); - for(int i = 0; i < size; i++){ - ScoreTerm st = stQueue.poll(); - TermQuery tq = new TermQuery(st.term); // found a match - tq.setBoost(getBoost() * st.score); // set the boost - query.add(tq, BooleanClause.Occur.SHOULD); // add to query - } - - return query; - } - protected static class ScoreTerm implements Comparable { public Term term; public float score; Index: src/java/org/apache/lucene/search/FuzzyTermEnum.java =================================================================== --- src/java/org/apache/lucene/search/FuzzyTermEnum.java (revision 887712) +++ src/java/org/apache/lucene/search/FuzzyTermEnum.java (working copy) @@ -30,6 +30,7 @@ * * @deprecated Please use {@link FuzzyTermsEnum} instead. */ +@Deprecated public final class FuzzyTermEnum extends FilteredTermEnum { /* Allows us save time required to create a new array Index: src/java/org/apache/lucene/search/FuzzyTermsEnum.java =================================================================== --- src/java/org/apache/lucene/search/FuzzyTermsEnum.java (revision 887712) +++ src/java/org/apache/lucene/search/FuzzyTermsEnum.java (working copy) @@ -19,7 +19,6 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermRef; import java.io.IOException; @@ -27,8 +26,9 @@ /** Subclass of FilteredTermEnum for enumerating all terms that are similar * to the specified filter term. * - *

Term enumerations are always ordered by Term.compareTo(). Each term in - * the enumeration is greater than all that precede it. + *

Term enumerations are always ordered by + * {@link #getTermComparator}. Each term in the enumeration is + * greater than all that precede it.

*/ public final class FuzzyTermsEnum extends FilteredTermsEnum { @@ -43,17 +43,16 @@ */ private int[][] d; - private float similarity; - private final boolean empty; - private Term searchTerm; - private final String field; private final String text; private final String prefix; private final float minimumSimilarity; private final float scale_factor; private final int[] maxDistances = new int[TYPICAL_LONGEST_WORD_IN_INDEX]; + + private final MultiTermQuery.TermDifferenceAttribute diffAtt = + attributes().addAttribute(MultiTermQuery.TermDifferenceAttribute.class); // nocommit -- remove some of these ctors: /** @@ -102,7 +101,7 @@ * @throws IOException */ public FuzzyTermsEnum(IndexReader reader, Term term, final float minSimilarity, final int prefixLength) throws IOException { - super(); + super(reader, term.field()); if (minSimilarity >= 1.0f) throw new IllegalArgumentException("minimumSimilarity cannot be greater than or equal to 1"); @@ -114,7 +113,6 @@ this.minimumSimilarity = minSimilarity; this.scale_factor = 1.0f / (1.0f - minimumSimilarity); this.searchTerm = term; - this.field = searchTerm.field(); //The prefix could be longer than the word. //It's kind of silly though. It means we must match the entire word. @@ -127,21 +125,11 @@ initializeMaxDistances(); this.d = initDistanceArray(); - Terms terms = reader.fields().terms(field); - if (terms != null) { - empty = setEnum(terms.iterator(), prefixTermRef) == null; - } else { - empty = false; - } + setInitialSeekTerm(prefixTermRef); } private final TermRef prefixTermRef; - @Override - public String field() { - return field; - } - /** * The termCompare method in FuzzyTermEnum uses Levenshtein distance to * calculate the distance between the given term and the comparing term. @@ -151,23 +139,16 @@ if (term.startsWith(prefixTermRef)) { // TODO: costly that we create intermediate String: final String target = term.toString().substring(prefix.length()); - this.similarity = similarity(target); - return (similarity > minimumSimilarity) ? AcceptStatus.YES : AcceptStatus.NO; + final float similarity = similarity(target); + if (similarity > minimumSimilarity) { + diffAtt.setDifference((float)((similarity - minimumSimilarity) * scale_factor)); + return AcceptStatus.YES; + } else return AcceptStatus.NO; } else { return AcceptStatus.END; } } - @Override - public final float difference() { - return (float)((similarity - minimumSimilarity) * scale_factor); - } - - @Override - public final boolean empty() { - return empty; - } - /****************************** * Compute Levenshtein distance ******************************/ Index: src/java/org/apache/lucene/search/MultiTermQuery.java =================================================================== --- src/java/org/apache/lucene/search/MultiTermQuery.java (revision 887712) +++ src/java/org/apache/lucene/search/MultiTermQuery.java (working copy) @@ -26,7 +26,10 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermRef; +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.queryParser.QueryParser; // for javadoc +import org.apache.lucene.util.Attribute; +import org.apache.lucene.util.AttributeImpl; /** * An abstract {@link Query} that matches documents @@ -58,9 +61,51 @@ * #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} by default. */ public abstract class MultiTermQuery extends Query { + protected final String field; protected RewriteMethod rewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; transient int numberOfTerms = 0; + + public static interface TermDifferenceAttribute extends Attribute { + public void setDifference(float difference); + public float getDifference(); + } + public static class TermDifferenceAttributeImpl extends AttributeImpl implements TermDifferenceAttribute { + private float difference = 1.0f; + + public void setDifference(float difference) { + this.difference = difference; + } + + public float getDifference() { + return difference; + } + + @Override + public void clear() { + difference = 1.0f; + } + + @Override + public boolean equals(Object other) { + if (this == other) + return true; + if (other instanceof TermDifferenceAttributeImpl) + return ((TermDifferenceAttributeImpl) other).difference == difference; + return false; + } + + @Override + public int hashCode() { + return Float.floatToIntBits(difference); + } + + @Override + public void copyTo(AttributeImpl target) { + ((TermDifferenceAttribute) target).setDifference(difference); + } + } + /** Abstract class that defines how the query is rewritten. */ public static abstract class RewriteMethod implements Serializable { public abstract Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException; @@ -98,27 +143,26 @@ @Override public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException { - FilteredTermsEnum termsEnum = query.getTermsEnum(reader); + TermsEnum termsEnum = query.getTermsEnum(reader); if (termsEnum != null) { - + final TermDifferenceAttribute diffAtt = + termsEnum.attributes().addAttribute(TermDifferenceAttribute.class); + // nocommit -- if no terms we'd want to return NullQuery BooleanQuery result = new BooleanQuery(true); - if (!termsEnum.empty()) { - final String field = termsEnum.field(); - assert field != null; - int count = 0; - TermRef term = termsEnum.term(); - // first term must exist since termsEnum wasn't null - assert term != null; - do { - TermQuery tq = new TermQuery(new Term(field, term.toString())); // found a match - tq.setBoost(query.getBoost() * termsEnum.difference()); // set the boost - result.add(tq, BooleanClause.Occur.SHOULD); // add to query - count++; - term = termsEnum.next(); - } while(term != null); - query.incTotalNumberOfTerms(count); + final String field = query.field; + if (field == null) + throw new NullPointerException("When you implement getTermsEnum(), you must specify a field."); + int count = 0; + TermRef term; + final Term placeholderTerm = new Term(field); + while ((term = termsEnum.next()) != null) { + TermQuery tq = new TermQuery(placeholderTerm.createTerm(term.toString())); // found a match + tq.setBoost(query.getBoost() * diffAtt.getDifference()); // set the boost + result.add(tq, BooleanClause.Occur.SHOULD); // add to query + count++; } + query.incTotalNumberOfTerms(count); return result; } else { // deprecated case @@ -167,9 +211,14 @@ private static class ConstantScoreBooleanQueryRewrite extends ScoringBooleanQueryRewrite implements Serializable { @Override public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException { - // strip the scores off - Query result = new ConstantScoreQuery(new QueryWrapperFilter(super.rewrite(reader, query))); - result.setBoost(query.getBoost()); + Query result = super.rewrite(reader, query); + assert result instanceof BooleanQuery; + // nocommit: if empty boolean query return NullQuery + if (!((BooleanQuery) result).clauses().isEmpty()) { + // strip the scores off + result = new ConstantScoreQuery(new QueryWrapperFilter(result)); + result.setBoost(query.getBoost()); + } return result; } @@ -248,54 +297,55 @@ // exhaust the enum before hitting either of the // cutoffs, we use ConstantBooleanQueryRewrite; else, // ConstantFilterRewrite: - final Collection pendingTerms = new ArrayList(); - final Collection oldApiPendingTerms = new ArrayList(); final int docCountCutoff = (int) ((docCountPercent / 100.) * reader.maxDoc()); final int termCountLimit = Math.min(BooleanQuery.getMaxClauseCount(), termCountCutoff); int docVisitCount = 0; - FilteredTermsEnum termsEnum = query.getTermsEnum(reader); + TermsEnum termsEnum = query.getTermsEnum(reader); if (termsEnum != null) { - if (!termsEnum.empty()) { - final String field = termsEnum.field(); - assert field != null; - TermRef term = termsEnum.term(); - // first term must exist since termsEnum wasn't null - assert term != null; - do { - pendingTerms.add((TermRef) term.clone()); - if (pendingTerms.size() >= termCountLimit || docVisitCount >= docCountCutoff) { - // Too many terms -- cut our losses now and make a filter. - Query result = new ConstantScoreQuery(new MultiTermQueryWrapperFilter(query)); - result.setBoost(query.getBoost()); - return result; - } - // Loading the TermInfo from the terms dict here - // should not be costly, because 1) the - // query/filter will load the TermInfo when it - // runs, and 2) the terms dict has a cache: - docVisitCount += reader.docFreq(field, term); - term = termsEnum.next(); - } while(term != null); - - // Enumeration is done, and we hit a small - // enough number of terms & docs -- just make a - // BooleanQuery, now + final Collection pendingTerms = new ArrayList(); + final String field = query.field; + if (field == null) + throw new NullPointerException("When you implement getTermsEnum(), you must specify a field."); + TermRef term; + while ((term = termsEnum.next()) != null) { + pendingTerms.add((TermRef) term.clone()); + if (pendingTerms.size() >= termCountLimit || docVisitCount >= docCountCutoff) { + // Too many terms -- cut our losses now and make a filter. + Query result = new ConstantScoreQuery(new MultiTermQueryWrapperFilter(query)); + result.setBoost(query.getBoost()); + return result; + } + // Loading the TermInfo from the terms dict here + // should not be costly, because 1) the + // query/filter will load the TermInfo when it + // runs, and 2) the terms dict has a cache: + docVisitCount += reader.docFreq(field, term); + } + + // Enumeration is done, and we hit a small + // enough number of terms & docs -- just make a + // BooleanQuery, now + + // nocommit: if pendingTerms.size()==0 return NullQuery + final Query result; + if (pendingTerms.isEmpty()) { + result = new BooleanQuery(true); + } else { BooleanQuery bq = new BooleanQuery(true); + final Term placeholderTerm = new Term(field); for(TermRef termRef : pendingTerms) { - TermQuery tq = new TermQuery(new Term(field, termRef.toString())); + TermQuery tq = new TermQuery(placeholderTerm.createTerm(termRef.toString())); bq.add(tq, BooleanClause.Occur.SHOULD); } // Strip scores - Query result = new ConstantScoreQuery(new QueryWrapperFilter(bq)); + result = new ConstantScoreQuery(new QueryWrapperFilter(bq)); result.setBoost(query.getBoost()); - query.incTotalNumberOfTerms(pendingTerms.size()); - return result; - } else { - // nocommit -- need NullQuery here - return new BooleanQuery(); } + query.incTotalNumberOfTerms(pendingTerms.size()); + return result; } else { + final Collection pendingTerms = new ArrayList(); // deprecated case FilteredTermEnum enumerator = query.getEnum(reader); @@ -303,7 +353,7 @@ while(true) { Term t = enumerator.term(); if (t != null) { - oldApiPendingTerms.add(t); + pendingTerms.add(t); // Loading the TermInfo from the terms dict here // should not be costly, because 1) the // query/filter will load the TermInfo when it @@ -313,21 +363,26 @@ if (pendingTerms.size() >= termCountLimit || docVisitCount >= docCountCutoff) { // Too many terms -- make a filter. - Query result = new ConstantScoreQuery(new MultiTermQueryWrapperFilter(query)); + Query result = new ConstantScoreQuery(new MultiTermQueryWrapperFilter(query)); result.setBoost(query.getBoost()); return result; } else if (!enumerator.next()) { // Enumeration is done, and we hit a small // enough number of terms & docs -- just make a // BooleanQuery, now - BooleanQuery bq = new BooleanQuery(true); - for (final Term term: oldApiPendingTerms) { - TermQuery tq = new TermQuery(term); - bq.add(tq, BooleanClause.Occur.SHOULD); + final Query result; + if (pendingTerms.isEmpty()) { + result = new BooleanQuery(true); + } else { + BooleanQuery bq = new BooleanQuery(true); + for(Term term : pendingTerms) { + TermQuery tq = new TermQuery(term); + bq.add(tq, BooleanClause.Occur.SHOULD); + } + // Strip scores + result = new ConstantScoreQuery(new QueryWrapperFilter(bq)); + result.setBoost(query.getBoost()); } - // Strip scores - Query result = new ConstantScoreQuery(new QueryWrapperFilter(bq)); - result.setBoost(query.getBoost()); query.incTotalNumberOfTerms(pendingTerms.size()); return result; } @@ -399,14 +454,30 @@ * Constructs a query matching terms that cannot be represented with a single * Term. */ + public MultiTermQuery(final String field) { + this.field = field; + } + + /** + * Constructs a query matching terms that cannot be represented with a single + * Term. + * @deprecated Use {@link #MultiTermQuery(String)}, as the flex branch can + * only work on one field per terms enum. If you override + * {@link #getTermsEnum(IndexReader)}, you cannot use this ctor. + */ + @Deprecated public MultiTermQuery() { + this(null); } + /** Returns the field name for this query */ + public final String getField() { return field; } + /** Construct the enumeration to be used, expanding the * pattern term. * @deprecated Please override {@link #getTermsEnum} instead */ - protected FilteredTermEnum getEnum(IndexReader reader) - throws IOException { + @Deprecated + protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { return null; } @@ -417,8 +488,7 @@ * term. * * nocommit in 3.x this will become abstract */ - protected FilteredTermsEnum getTermsEnum(IndexReader reader) - throws IOException { + protected TermsEnum getTermsEnum(IndexReader reader) throws IOException { return null; } @@ -479,8 +549,8 @@ final int prime = 31; int result = 1; result = prime * result + Float.floatToIntBits(getBoost()); - result = prime * result; - result += rewriteMethod.hashCode(); + result = prime * result + rewriteMethod.hashCode(); + if (field != null) result = prime * result + field.hashCode(); return result; } @@ -498,7 +568,7 @@ if (!rewriteMethod.equals(other.rewriteMethod)) { return false; } - return true; + return (other.field == null ? field == null : other.field.equals(field)); } } Index: src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java =================================================================== --- src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java (revision 887712) +++ src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java (working copy) @@ -74,6 +74,9 @@ public final int hashCode() { return query.hashCode(); } + + /** Returns the field name for this query */ + public final String getField() { return query.getField(); } /** * Expert: Return the number of unique terms visited during execution of the filter. @@ -104,16 +107,16 @@ */ @Override public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - final FilteredTermsEnum termsEnum = query.getTermsEnum(reader); + final TermsEnum termsEnum = query.getTermsEnum(reader); if (termsEnum != null) { - if (!termsEnum.empty()) { + if (termsEnum.next() != null) { // fill into a OpenBitSet final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc()); final int[] docs = new int[32]; final int[] freqs = new int[32]; int termCount = 0; final Bits delDocs = reader.getDeletedDocs(); - while (true) { + do { termCount++; // System.out.println(" iter termCount=" + termCount + " term=" + // enumerator.term().toBytesString()); @@ -128,13 +131,7 @@ break; } } - TermRef term = termsEnum.next(); - if (term == null) { - break; - } - // System.out.println(" enum next term=" + term.toBytesString()); - assert term.termEquals(termsEnum.term()); - } + } while (termsEnum.next() != null); // System.out.println(" done termCount=" + termCount); query.incTotalNumberOfTerms(termCount); Index: src/java/org/apache/lucene/search/NumericRangeFilter.java =================================================================== --- src/java/org/apache/lucene/search/NumericRangeFilter.java (revision 887712) +++ src/java/org/apache/lucene/search/NumericRangeFilter.java (working copy) @@ -170,9 +170,6 @@ NumericRangeQuery.newFloatRange(field, min, max, minInclusive, maxInclusive) ); } - - /** Returns the field name for this filter */ - public String getField() { return query.getField(); } /** Returns true if the lower endpoint is inclusive */ public boolean includesMin() { return query.includesMin(); } Index: src/java/org/apache/lucene/search/NumericRangeQuery.java =================================================================== --- src/java/org/apache/lucene/search/NumericRangeQuery.java (revision 887712) +++ src/java/org/apache/lucene/search/NumericRangeQuery.java (working copy) @@ -28,7 +28,6 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermRef; -import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; /** @@ -160,10 +159,10 @@ private NumericRangeQuery(final String field, final int precisionStep, final int valSize, T min, T max, final boolean minInclusive, final boolean maxInclusive ) { + super(field); assert (valSize == 32 || valSize == 64); if (precisionStep < 1) throw new IllegalArgumentException("precisionStep must be >=1"); - this.field = StringHelper.intern(field); this.precisionStep = precisionStep; this.valSize = valSize; this.min = min; @@ -303,14 +302,14 @@ return new NumericRangeQuery(field, NumericUtils.PRECISION_STEP_DEFAULT, 32, min, max, minInclusive, maxInclusive); } - @Override - protected FilteredTermsEnum getTermsEnum(final IndexReader reader) throws IOException { - return new NumericRangeTermsEnum(reader); + @Override @SuppressWarnings("unchecked") + protected TermsEnum getTermsEnum(final IndexReader reader) throws IOException { + // very strange: java.lang.Number itsself is not Comparable, but all subclasses used here are + return (min != null && max != null && ((Comparable) min).compareTo(max) > 0) ? + new EmptyTermsEnum() : + new NumericRangeTermsEnum(reader); } - /** Returns the field name for this query */ - public String getField() { return field; } - /** Returns true if the lower endpoint is inclusive */ public boolean includesMin() { return minInclusive; } @@ -326,7 +325,7 @@ @Override public String toString(final String field) { final StringBuilder sb = new StringBuilder(); - if (!this.field.equals(field)) sb.append(this.field).append(':'); + if (!getField().equals(field)) sb.append(getField()).append(':'); return sb.append(minInclusive ? '[' : '{') .append((min == null) ? "*" : min.toString()) .append(" TO ") @@ -344,7 +343,6 @@ if (o instanceof NumericRangeQuery) { final NumericRangeQuery q=(NumericRangeQuery)o; return ( - field==q.field && (q.min == null ? min == null : q.min.equals(min)) && (q.max == null ? max == null : q.max.equals(max)) && minInclusive == q.minInclusive && @@ -358,22 +356,15 @@ @Override public final int hashCode() { int hash = super.hashCode(); - hash += field.hashCode()^0x4565fd66 + precisionStep^0x64365465; + hash += precisionStep^0x64365465; if (min != null) hash += min.hashCode()^0x14fa55fb; if (max != null) hash += max.hashCode()^0x733fa5fe; return hash + (Boolean.valueOf(minInclusive).hashCode()^0x14fa55fb)+ (Boolean.valueOf(maxInclusive).hashCode()^0x733fa5fe); } - - // field must be interned after reading from stream - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - in.defaultReadObject(); - field = StringHelper.intern(field); - } // members (package private, to be also fast accessible by NumericRangeTermEnum) - String field; final int precisionStep, valSize; final T min, max; final boolean minInclusive,maxInclusive; @@ -390,15 +381,13 @@ */ private final class NumericRangeTermsEnum extends FilteredTermsEnum { - private final IndexReader reader; + private final TermRef currentLowerBound = new TermRef(), currentUpperBound = new TermRef(); + private final LinkedList rangeBounds = new LinkedList(); - private TermRef currentUpperBound = null; - private final boolean empty; private final TermRef.Comparator termComp; NumericRangeTermsEnum(final IndexReader reader) throws IOException { - this.reader = reader; - + super(reader, getField()); switch (valSize) { case 64: { // lower @@ -475,99 +464,32 @@ throw new IllegalArgumentException("valSize must be 32 or 64"); } - // initialize iterator - final Terms terms = reader.fields().terms(field); - if (terms != null) { - // TODO: NRQ by design relies on a specific sort - // order; I think UT8 or UTF16 would work (NRQ encodes - // to only ASCII). - termComp = terms.getTermComparator(); - actualEnum = terms.iterator(); - } else { - termComp = null; - actualEnum = null; - } - - // seek to first term - empty = next() == null; + termComp = getTermComparator(); } - - @Override - public float difference() { - return 1.0f; - } @Override - public boolean empty() { - return empty; - } - - @Override - protected TermRef setEnum(TermsEnum actualEnum, TermRef term) throws IOException { - throw new UnsupportedOperationException("not implemented"); - } - - @Override - public SeekStatus seek(TermRef term) throws IOException { - throw new UnsupportedOperationException("not implemented"); - } - - @Override - public SeekStatus seek(long ord) throws IOException { - throw new UnsupportedOperationException("not implemented"); - } - - @Override - public String field() { - return field; - } - - @Override - protected AcceptStatus accept(TermRef term) { - return (termComp.compare(term, currentUpperBound) <= 0) ? - AcceptStatus.YES : AcceptStatus.NO; - } - - @Override - public TermRef next() throws IOException { - if (actualEnum == null) { - return null; - } - - // try change to next term, if no such term exists, fall-through - // (we can only do this if the enum was already seeked) - if (currentUpperBound != null) { - final TermRef term = actualEnum.next(); - if (term != null && accept(term) == AcceptStatus.YES) { - return term; - } - } - - // if all above fails, we seek forward - while (rangeBounds.size() >= 2) { + protected final TermRef nextSeekTerm() throws IOException { + if (rangeBounds.size() >= 2) { assert rangeBounds.size() % 2 == 0; - final TermRef lowerBound = new TermRef(rangeBounds.removeFirst()); - assert currentUpperBound == null || termComp.compare(currentUpperBound, lowerBound) <= 0 : + this.currentLowerBound.copy(rangeBounds.removeFirst()); + assert termComp.compare(currentUpperBound, currentLowerBound) <= 0 : "The current upper bound must be <= the new lower bound"; - this.currentUpperBound = new TermRef(rangeBounds.removeFirst()); - - SeekStatus status = actualEnum.seek(lowerBound); - if (status == SeekStatus.END) { - return null; - } - - final TermRef term = actualEnum.term(); - if (accept(term) == AcceptStatus.YES) { - return term; - } + this.currentUpperBound.copy(rangeBounds.removeFirst()); + return currentLowerBound; } // no more sub-range enums available assert rangeBounds.size() == 0; return null; } + + @Override + protected AcceptStatus accept(TermRef term) { + return (currentUpperBound != null && termComp.compare(term, currentUpperBound) <= 0) ? + AcceptStatus.YES : AcceptStatus.NO_AND_SEEK; + } } Index: src/java/org/apache/lucene/search/PrefixQuery.java =================================================================== --- src/java/org/apache/lucene/search/PrefixQuery.java (revision 887712) +++ src/java/org/apache/lucene/search/PrefixQuery.java (working copy) @@ -20,6 +20,8 @@ import java.io.IOException; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.Term; import org.apache.lucene.util.ToStringUtils; @@ -34,20 +36,24 @@ /** Constructs a query for terms starting with prefix. */ public PrefixQuery(Term prefix) { + super(prefix.field()); this.prefix = prefix; } /** Returns the prefix of this query. */ public Term getPrefix() { return prefix; } - // deprecated - @Override + @Override @Deprecated protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { return new PrefixTermEnum(reader, prefix); } @Override - protected FilteredTermsEnum getTermsEnum(IndexReader reader) throws IOException { + protected TermsEnum getTermsEnum(IndexReader reader) throws IOException { + if (prefix.text().length() == 0) { + final Terms terms = reader.fields().terms(getField()); + return (terms != null) ? terms.iterator() : new EmptyTermsEnum(); + } return new PrefixTermsEnum(reader, prefix); } @@ -55,8 +61,8 @@ @Override public String toString(String field) { StringBuilder buffer = new StringBuilder(); - if (!prefix.field().equals(field)) { - buffer.append(prefix.field()); + if (!getField().equals(field)) { + buffer.append(getField()); buffer.append(":"); } buffer.append(prefix.text()); Index: src/java/org/apache/lucene/search/PrefixTermEnum.java =================================================================== --- src/java/org/apache/lucene/search/PrefixTermEnum.java (revision 887712) +++ src/java/org/apache/lucene/search/PrefixTermEnum.java (working copy) @@ -31,6 +31,7 @@ * * @deprecated Use {@link PrefixTermsEnum} instead. */ +@Deprecated public class PrefixTermEnum extends FilteredTermEnum { private final Term prefix; Index: src/java/org/apache/lucene/search/PrefixTermsEnum.java =================================================================== --- src/java/org/apache/lucene/search/PrefixTermsEnum.java (revision 887712) +++ src/java/org/apache/lucene/search/PrefixTermsEnum.java (working copy) @@ -21,50 +21,26 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermRef; /** * Subclass of FilteredTermEnum for enumerating all terms that match the * specified prefix filter term. - *

- * Term enumerations are always ordered by Term.compareTo(). Each term in - * the enumeration is greater than all that precede it. - * + *

Term enumerations are always ordered by + * {@link #getTermComparator}. Each term in the enumeration is + * greater than all that precede it.

*/ public class PrefixTermsEnum extends FilteredTermsEnum { private final Term prefix; private final TermRef prefixRef; - private final boolean empty; public PrefixTermsEnum(IndexReader reader, Term prefix) throws IOException { + super(reader, prefix.field()); this.prefix = prefix; - Terms terms = reader.fields().terms(prefix.field()); - if (terms != null) { - prefixRef = new TermRef(prefix.text()); - empty = setEnum(terms.iterator(), prefixRef) == null; - } else { - empty = true; - prefixRef = null; - } + setInitialSeekTerm(prefixRef = new TermRef(prefix.text())); } - @Override - public String field() { - return prefix.field(); - } - - @Override - public float difference() { - return 1.0f; - } - - @Override - public boolean empty() { - return empty; - } - protected Term getPrefixTerm() { return prefix; } Index: src/java/org/apache/lucene/search/SingleTermEnum.java =================================================================== --- src/java/org/apache/lucene/search/SingleTermEnum.java (revision 887712) +++ src/java/org/apache/lucene/search/SingleTermEnum.java (working copy) @@ -29,6 +29,7 @@ * but want to preserve MultiTermQuery semantics such as * {@link MultiTermQuery#rewriteMethod}. */ +@Deprecated public class SingleTermEnum extends FilteredTermEnum { private Term singleTerm; private boolean endEnum = false; Index: src/java/org/apache/lucene/search/SingleTermsEnum.java =================================================================== --- src/java/org/apache/lucene/search/SingleTermsEnum.java (revision 887712) +++ src/java/org/apache/lucene/search/SingleTermsEnum.java (working copy) @@ -31,10 +31,8 @@ * but want to preserve MultiTermQuery semantics such as * {@link MultiTermQuery#rewriteMethod}. */ -public class SingleTermsEnum extends FilteredTermsEnum { - private final Term singleTerm; +public final class SingleTermsEnum extends FilteredTermsEnum { private final TermRef singleRef; - private final boolean empty; /** * Creates a new SingleTermsEnum. @@ -43,38 +41,14 @@ * if it exists. */ public SingleTermsEnum(IndexReader reader, Term singleTerm) throws IOException { - this.singleTerm = singleTerm; - Terms terms = reader.fields().terms(singleTerm.field()); - if (terms != null) { - singleRef = new TermRef(singleTerm.text()); - empty = setEnum(terms.iterator(), singleRef) == null; - } else { - empty = true; - singleRef = null; - } + super(reader, singleTerm.field()); + singleRef = new TermRef(singleTerm.text()); + setInitialSeekTerm(singleRef); } @Override protected AcceptStatus accept(TermRef term) { - if (term.equals(singleRef)) { - return AcceptStatus.YES; - } else { - return AcceptStatus.END; - } + return term.equals(singleRef) ? AcceptStatus.YES : AcceptStatus.END; } - - @Override - public float difference() { - return 1.0F; - } - - @Override - public boolean empty() { - return empty; - } - - @Override - public String field() { - return singleTerm.field(); - } + } Index: src/java/org/apache/lucene/search/TermRangeFilter.java =================================================================== --- src/java/org/apache/lucene/search/TermRangeFilter.java (revision 887712) +++ src/java/org/apache/lucene/search/TermRangeFilter.java (working copy) @@ -87,9 +87,6 @@ public static TermRangeFilter More(String fieldName, String lowerTerm) { return new TermRangeFilter(fieldName, lowerTerm, null, true, false); } - - /** Returns the field name for this filter */ - public String getField() { return query.getField(); } /** Returns the lower value of this range filter */ public String getLowerTerm() { return query.getLowerTerm(); } Index: src/java/org/apache/lucene/search/TermRangeQuery.java =================================================================== --- src/java/org/apache/lucene/search/TermRangeQuery.java (revision 887712) +++ src/java/org/apache/lucene/search/TermRangeQuery.java (working copy) @@ -21,6 +21,8 @@ import java.text.Collator; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.Terms; import org.apache.lucene.util.ToStringUtils; /** @@ -41,7 +43,6 @@ private String lowerTerm; private String upperTerm; private Collator collator; - private String field; private boolean includeLower; private boolean includeUpper; @@ -104,7 +105,7 @@ */ public TermRangeQuery(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper, Collator collator) { - this.field = field; + super(field); this.lowerTerm = lowerTerm; this.upperTerm = upperTerm; this.includeLower = includeLower; @@ -112,9 +113,6 @@ this.collator = collator; } - /** Returns the field name for this query */ - public String getField() { return field; } - /** Returns the lower value of this range query */ public String getLowerTerm() { return lowerTerm; } @@ -130,24 +128,30 @@ /** Returns the collator used to determine range inclusion, if any. */ public Collator getCollator() { return collator; } - @Override + @Override @Deprecated protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { return new TermRangeTermEnum(reader, field, lowerTerm, upperTerm, includeLower, includeUpper, collator); } - public String field() { - return field; - } - @Override - protected FilteredTermsEnum getTermsEnum(IndexReader reader) throws IOException { + protected TermsEnum getTermsEnum(IndexReader reader) throws IOException { + if (collator == null && lowerTerm != null && upperTerm != null && lowerTerm.compareTo(upperTerm) > 0) { + return new EmptyTermsEnum(); + } + if ((lowerTerm == null || (collator == null && includeLower && "".equals(lowerTerm))) && upperTerm == null) { + final Terms terms = reader.fields().terms(field); + return (terms != null) ? terms.iterator() : new EmptyTermsEnum(); + } return new TermRangeTermsEnum(reader, field, - lowerTerm, upperTerm, - includeLower, includeUpper, - collator); + lowerTerm, upperTerm, includeLower, includeUpper, collator); } + /** @deprecated */ + public String field() { + return getField(); + } + /** Prints a user-readable version of this query. */ @Override public String toString(String field) { @@ -170,7 +174,6 @@ final int prime = 31; int result = super.hashCode(); result = prime * result + ((collator == null) ? 0 : collator.hashCode()); - result = prime * result + ((field == null) ? 0 : field.hashCode()); result = prime * result + (includeLower ? 1231 : 1237); result = prime * result + (includeUpper ? 1231 : 1237); result = prime * result + ((lowerTerm == null) ? 0 : lowerTerm.hashCode()); @@ -192,11 +195,6 @@ return false; } else if (!collator.equals(other.collator)) return false; - if (field == null) { - if (other.field != null) - return false; - } else if (!field.equals(other.field)) - return false; if (includeLower != other.includeLower) return false; if (includeUpper != other.includeUpper) Index: src/java/org/apache/lucene/search/TermRangeTermEnum.java =================================================================== --- src/java/org/apache/lucene/search/TermRangeTermEnum.java (revision 887712) +++ src/java/org/apache/lucene/search/TermRangeTermEnum.java (working copy) @@ -33,6 +33,7 @@ * @since 2.9 * @deprecated Please switch to {@link TermRangeTermsEnum} */ +@Deprecated public class TermRangeTermEnum extends FilteredTermEnum { private Collator collator = null; Index: src/java/org/apache/lucene/search/TermRangeTermsEnum.java =================================================================== --- src/java/org/apache/lucene/search/TermRangeTermsEnum.java (revision 887712) +++ src/java/org/apache/lucene/search/TermRangeTermsEnum.java (working copy) @@ -22,28 +22,24 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.TermRef; -import org.apache.lucene.index.Terms; -//import org.apache.lucene.index.Term; import org.apache.lucene.util.StringHelper; /** * Subclass of FilteredTermEnum for enumerating all terms that match the * specified range parameters. - *

- * Term enumerations are always ordered by Term.compareTo(). Each term in - * the enumeration is greater than all that precede it. + *

Term enumerations are always ordered by + * {@link #getTermComparator}. Each term in the enumeration is + * greater than all that precede it.

*/ public class TermRangeTermsEnum extends FilteredTermsEnum { private Collator collator; - private String field; private String upperTermText; private String lowerTermText; private boolean includeLower; private boolean includeUpper; final private TermRef lowerTermRef; final private TermRef upperTermRef; - private final boolean empty; private final TermRef.Comparator termComp; /** @@ -75,12 +71,12 @@ */ public TermRangeTermsEnum(IndexReader reader, String field, String lowerTermText, String upperTermText, boolean includeLower, boolean includeUpper, Collator collator) throws IOException { + super(reader, field); this.collator = collator; this.upperTermText = upperTermText; this.lowerTermText = lowerTermText; this.includeLower = includeLower; this.includeUpper = includeUpper; - this.field = StringHelper.intern(field); // do a little bit of normalization... // open ended range queries should always be inclusive. @@ -97,42 +93,16 @@ upperTermRef = new TermRef(upperTermText); } - String startTermText = collator == null ? this.lowerTermText : ""; - Terms terms = reader.fields().terms(field); - - if (terms != null) { - termComp = terms.getTermComparator(); - final boolean foundFirstTerm = setEnum(terms.iterator(), new TermRef(startTermText)) != null; - - if (foundFirstTerm && collator == null && !this.includeLower && term().termEquals(lowerTermRef)) { - empty = next() == null; - } else { - empty = !foundFirstTerm; - } - } else { - empty = true; - termComp = null; - } + TermRef startTermRef = (collator == null) ? lowerTermRef : new TermRef(""); + setInitialSeekTerm(startTermRef); + termComp = getTermComparator(); } @Override - public float difference() { - return 1.0f; - } - - @Override - public boolean empty() { - return empty; - } - - @Override - public String field() { - return field; - } - - @Override protected AcceptStatus accept(TermRef term) { if (collator == null) { + if (!this.includeLower && term.equals(lowerTermRef)) + return AcceptStatus.NO; // Use this field's default sort ordering if (upperTermRef != null) { final int cmp = termComp.compare(upperTermRef, term); Index: src/java/org/apache/lucene/search/WildcardQuery.java =================================================================== --- src/java/org/apache/lucene/search/WildcardQuery.java (revision 887712) +++ src/java/org/apache/lucene/search/WildcardQuery.java (working copy) @@ -19,6 +19,8 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.ToStringUtils; import java.io.IOException; @@ -41,6 +43,7 @@ protected Term term; public WildcardQuery(Term term) { + super(term.field()); this.term = term; String text = term.text(); this.termContainsWildcard = (text.indexOf('*') != -1) @@ -51,16 +54,29 @@ } @Override - protected FilteredTermsEnum getTermsEnum(IndexReader reader) throws IOException { + protected TermsEnum getTermsEnum(IndexReader reader) throws IOException { + if (termIsPrefix) { + final String text = getTerm().text(); + final Term t = getTerm().createTerm(text.substring(0,text.length()-1)); + if (t.text().length() == 0) { + final Terms terms = reader.fields().terms(getField()); + return (terms != null) ? terms.iterator() : new EmptyTermsEnum(); + } + return new PrefixTermsEnum(reader, t); + } if (termContainsWildcard) return new WildcardTermsEnum(reader, getTerm()); else return new SingleTermsEnum(reader, getTerm()); } - // @deprecated see getTermsEnum - @Override + @Override @Deprecated protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { + if (termIsPrefix) { + final String text = getTerm().text(); + final Term t = getTerm().createTerm(text.substring(0,text.length()-1)); + return new PrefixTermEnum(reader, t); + } if (termContainsWildcard) return new WildcardTermEnum(reader, getTerm()); else @@ -74,25 +90,12 @@ return term; } - @Override - public Query rewrite(IndexReader reader) throws IOException { - if (termIsPrefix) { - MultiTermQuery rewritten = new PrefixQuery(term.createTerm(term.text() - .substring(0, term.text().indexOf('*')))); - rewritten.setBoost(getBoost()); - rewritten.setRewriteMethod(getRewriteMethod()); - return rewritten; - } else { - return super.rewrite(reader); - } - } - /** Prints a user-readable version of this query. */ @Override public String toString(String field) { StringBuilder buffer = new StringBuilder(); - if (!term.field().equals(field)) { - buffer.append(term.field()); + if (!getField().equals(field)) { + buffer.append(getField()); buffer.append(":"); } buffer.append(term.text()); Index: src/java/org/apache/lucene/search/WildcardTermEnum.java =================================================================== --- src/java/org/apache/lucene/search/WildcardTermEnum.java (revision 887712) +++ src/java/org/apache/lucene/search/WildcardTermEnum.java (working copy) @@ -30,6 +30,7 @@ * the enumeration is greater than all that precede it. * @deprecated Please use {@link WildcardTermsEnum} instead. */ +@Deprecated public class WildcardTermEnum extends FilteredTermEnum { final Term searchTerm; final String field; Index: src/java/org/apache/lucene/search/WildcardTermsEnum.java =================================================================== --- src/java/org/apache/lucene/search/WildcardTermsEnum.java (revision 887712) +++ src/java/org/apache/lucene/search/WildcardTermsEnum.java (working copy) @@ -35,11 +35,9 @@ */ public class WildcardTermsEnum extends FilteredTermsEnum { final Term searchTerm; - final String field; final String text; final String pre; final int preLen; - private final boolean empty; private final TermRef preTermRef; /** @@ -49,9 +47,8 @@ * valid term if such a term exists. */ public WildcardTermsEnum(IndexReader reader, Term term) throws IOException { - super(); - searchTerm = term; - field = searchTerm.field(); + super(reader, term.field()); + this.searchTerm = term; final String searchTermText = searchTerm.text(); final int sidx = searchTermText.indexOf(WILDCARD_STRING); @@ -67,22 +64,10 @@ preLen = pre.length(); text = searchTermText.substring(preLen); - preTermRef = new TermRef(pre); - - Terms terms = reader.fields().terms(searchTerm.field()); - if (terms != null) { - empty = setEnum(terms.iterator(), preTermRef) == null; - } else { - empty = true; - } + setInitialSeekTerm(preTermRef = new TermRef(pre)); } @Override - public String field() { - return searchTerm.field(); - } - - @Override protected final AcceptStatus accept(TermRef term) { if (term.startsWith(preTermRef)) { // TODO: would be better, but trickier, to not have to @@ -99,16 +84,6 @@ } } - @Override - public float difference() { - return 1.0f; - } - - @Override - public final boolean empty() { - return empty; - } - /******************************************** * String equality with support for wildcards ********************************************/ Index: src/test/org/apache/lucene/search/TestNumericRangeQuery32.java =================================================================== --- src/test/org/apache/lucene/search/TestNumericRangeQuery32.java (revision 887712) +++ src/test/org/apache/lucene/search/TestNumericRangeQuery32.java (working copy) @@ -27,6 +27,7 @@ import org.apache.lucene.index.TermRef; import org.apache.lucene.index.IndexWriter.MaxFieldLength; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.NumericUtils; @@ -442,20 +443,18 @@ private void testEnum(int lower, int upper) throws Exception { NumericRangeQuery q = NumericRangeQuery.newIntRange("field4", 4, lower, upper, true, true); - FilteredTermsEnum termEnum = q.getTermsEnum(searcher.getIndexReader()); + TermsEnum termEnum = q.getTermsEnum(searcher.getIndexReader()); int count = 0; - if (!termEnum.empty()) { - do { - final TermRef t = termEnum.term(); - if (t != null) { - final int val = NumericUtils.prefixCodedToInt(t.toString()); - assertTrue("value not in bounds " + val + " >= " + lower + " && " - + val + " <= " + upper, val >= lower && val <= upper); - count++; - } else - break; - } while (termEnum.next() != null); - } + while (termEnum.next() != null) { + final TermRef t = termEnum.term(); + if (t != null) { + final int val = NumericUtils.prefixCodedToInt(t.toString()); + assertTrue("value not in bounds " + val + " >= " + lower + " && " + + val + " <= " + upper, val >= lower && val <= upper); + count++; + } else + break; + } assertNull(termEnum.next()); System.out.println("TermEnum on 'field4' for range [" + lower + "," + upper + "] contained " + count + " terms."); Index: src/test/org/apache/lucene/search/TestPrefixQuery.java =================================================================== --- src/test/org/apache/lucene/search/TestPrefixQuery.java (revision 887712) +++ src/test/org/apache/lucene/search/TestPrefixQuery.java (working copy) @@ -52,5 +52,10 @@ query = new PrefixQuery(new Term("category", "/Computers/Mac")); hits = searcher.search(query, null, 1000).scoreDocs; assertEquals("One in /Computers/Mac", 1, hits.length); + + query = new PrefixQuery(new Term("category", "")); + assertFalse(query.getTermsEnum(searcher.getIndexReader()) instanceof PrefixTermsEnum); + hits = searcher.search(query, null, 1000).scoreDocs; + assertEquals("everything", 3, hits.length); } } Index: src/test/org/apache/lucene/search/TestTermRangeQuery.java =================================================================== --- src/test/org/apache/lucene/search/TestTermRangeQuery.java (revision 887712) +++ src/test/org/apache/lucene/search/TestTermRangeQuery.java (working copy) @@ -80,6 +80,25 @@ assertEquals("C added - A, B, C in range", 3, hits.length); searcher.close(); } + + public void testAllDocs() throws Exception { + initializeIndex(new String[]{"A", "B", "C", "D"}); + IndexSearcher searcher = new IndexSearcher(dir, true); + TermRangeQuery query = new TermRangeQuery("content", null, null, true, true); + assertFalse(query.getTermsEnum(searcher.getIndexReader()) instanceof TermRangeTermsEnum); + assertEquals(4, searcher.search(query, null, 1000).scoreDocs.length); + query = new TermRangeQuery("content", null, null, false, false); + assertFalse(query.getTermsEnum(searcher.getIndexReader()) instanceof TermRangeTermsEnum); + assertEquals(4, searcher.search(query, null, 1000).scoreDocs.length); + query = new TermRangeQuery("content", "", null, true, false); + assertFalse(query.getTermsEnum(searcher.getIndexReader()) instanceof TermRangeTermsEnum); + assertEquals(4, searcher.search(query, null, 1000).scoreDocs.length); + // and now anothe one + query = new TermRangeQuery("content", "B", null, true, false); + assertTrue(query.getTermsEnum(searcher.getIndexReader()) instanceof TermRangeTermsEnum); + assertEquals(3, searcher.search(query, null, 1000).scoreDocs.length); + searcher.close(); + } public void testEqualsHashcode() { Query query = new TermRangeQuery("content", "A", "C", true, true); Index: src/test/org/apache/lucene/search/TestWildcard.java =================================================================== --- src/test/org/apache/lucene/search/TestWildcard.java (revision 887712) +++ src/test/org/apache/lucene/search/TestWildcard.java (working copy) @@ -120,30 +120,14 @@ MultiTermQuery wq = new WildcardQuery(new Term("field", "prefix*")); assertMatches(searcher, wq, 2); - MultiTermQuery expected = new PrefixQuery(new Term("field", "prefix")); - wq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); - wq.setBoost(0.1F); - expected.setRewriteMethod(wq.getRewriteMethod()); - expected.setBoost(wq.getBoost()); - assertEquals(searcher.rewrite(expected), searcher.rewrite(wq)); + assertTrue(wq.getEnum(searcher.getIndexReader()) instanceof PrefixTermEnum); + assertTrue(wq.getTermsEnum(searcher.getIndexReader()) instanceof PrefixTermsEnum); - wq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE); - wq.setBoost(0.2F); - expected.setRewriteMethod(wq.getRewriteMethod()); - expected.setBoost(wq.getBoost()); - assertEquals(searcher.rewrite(expected), searcher.rewrite(wq)); - - wq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT); - wq.setBoost(0.3F); - expected.setRewriteMethod(wq.getRewriteMethod()); - expected.setBoost(wq.getBoost()); - assertEquals(searcher.rewrite(expected), searcher.rewrite(wq)); - - wq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); - wq.setBoost(0.4F); - expected.setRewriteMethod(wq.getRewriteMethod()); - expected.setBoost(wq.getBoost()); - assertEquals(searcher.rewrite(expected), searcher.rewrite(wq)); + wq = new WildcardQuery(new Term("field", "*")); + assertMatches(searcher, wq, 2); + assertTrue(wq.getEnum(searcher.getIndexReader()) instanceof PrefixTermEnum); + assertFalse(wq.getTermsEnum(searcher.getIndexReader()) instanceof PrefixTermsEnum); + assertFalse(wq.getTermsEnum(searcher.getIndexReader()) instanceof WildcardTermsEnum); } /**