diff -r 4fd904dd1987 lucene/CHANGES.txt --- a/lucene/CHANGES.txt Mon Sep 27 15:32:21 2010 +0000 +++ b/lucene/CHANGES.txt Mon Sep 27 12:07:18 2010 -0400 @@ -108,6 +108,9 @@ * LUCENE-2600: Remove IndexReader.isDeleted in favor of IndexReader.getDeletedDocs(). (Mike McCandless) +* LUCENE-XXXX: MultiTermQuery.TermCollector.collect now accepts the + TermsEnum as well. (Robert Muir, Mike McCandless) + Changes in Runtime Behavior * LUCENE-2650: The behavior of FSDirectory.open has changed. On 64-bit diff -r 4fd904dd1987 lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java --- a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java Mon Sep 27 15:32:21 2010 +0000 +++ b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java Mon Sep 27 12:07:18 2010 -0400 @@ -91,6 +91,10 @@ } @Override + public void cacheCurrentTerm() { + } + + @Override public BytesRef term() { return br; } diff -r 4fd904dd1987 lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java --- a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Mon Sep 27 15:32:21 2010 +0000 +++ b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Mon Sep 27 12:07:18 2010 -0400 @@ -874,6 +874,10 @@ } @Override + public void cacheCurrentTerm() { + } + + @Override public long ord() { return termUpto; } diff -r 4fd904dd1987 lucene/src/java/org/apache/lucene/index/FilterIndexReader.java --- a/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java Mon Sep 27 15:32:21 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java Mon Sep 27 12:07:18 2010 -0400 @@ -130,6 +130,11 @@ } @Override + public void cacheCurrentTerm() throws IOException { + in.cacheCurrentTerm(); + } + + @Override public SeekStatus seek(long ord) throws IOException { return in.seek(ord); } diff -r 4fd904dd1987 lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java --- a/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java Mon Sep 27 15:32:21 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java Mon Sep 27 12:07:18 2010 -0400 @@ -91,6 +91,13 @@ } @Override + public void cacheCurrentTerm() throws IOException { + for(int i=0;i getComparator() { return termComp; } diff -r 4fd904dd1987 lucene/src/java/org/apache/lucene/index/TermsEnum.java --- a/lucene/src/java/org/apache/lucene/index/TermsEnum.java Mon Sep 27 15:32:21 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/index/TermsEnum.java Mon Sep 27 12:07:18 2010 -0400 @@ -123,6 +123,10 @@ * instance & reuse it. */ public abstract Comparator getComparator() throws IOException; + /** Optional optimization hint: informs the codec that the + * current term is likely to be re-seek'd-to soon. */ + public abstract void cacheCurrentTerm() throws IOException; + /** An empty TermsEnum for quickly returning an empty instance e.g. * in {@link org.apache.lucene.search.MultiTermQuery} *

Please note: This enum should be unmodifiable, @@ -138,6 +142,9 @@ public SeekStatus seek(long ord) { return SeekStatus.END; } @Override + public void cacheCurrentTerm() {} + + @Override public BytesRef term() { throw new IllegalStateException("this method should never be called"); } diff -r 4fd904dd1987 lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java --- a/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java Mon Sep 27 15:32:21 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java Mon Sep 27 12:07:18 2010 -0400 @@ -82,6 +82,11 @@ public FieldAndTerm() { } + public FieldAndTerm(String field, BytesRef term) { + this.field = field; + this.term = new BytesRef(term); + } + public FieldAndTerm(FieldAndTerm other) { field = other.field; term = new BytesRef(other.term); @@ -297,6 +302,14 @@ return termComp; } + @Override + public void cacheCurrentTerm() { + TermState stateCopy = (TermState) state.clone(); + stateCopy.filePointer = in.getFilePointer(); + termsCache.put(new FieldAndTerm(fieldInfo.name, bytesReader.term), + stateCopy); + } + /** Seeks until the first term that's >= the provided * text; returns SeekStatus.FOUND if the exact term * is found, SeekStatus.NOT_FOUND if a different term diff -r 4fd904dd1987 lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java --- a/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java Mon Sep 27 15:32:21 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java Mon Sep 27 12:07:18 2010 -0400 @@ -727,6 +727,11 @@ } @Override + public void cacheCurrentTerm() throws IOException { + // nocommit todo + } + + @Override public SeekStatus seek(long ord) throws IOException { throw new UnsupportedOperationException(); } diff -r 4fd904dd1987 lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java --- a/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java Mon Sep 27 15:32:21 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java Mon Sep 27 12:07:18 2010 -0400 @@ -154,6 +154,10 @@ } @Override + public void cacheCurrentTerm() { + } + + @Override public BytesRef next() throws IOException { assert !ended; readLine(in, scratch); diff -r 4fd904dd1987 lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java --- a/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java Mon Sep 27 15:32:21 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java Mon Sep 27 12:07:18 2010 -0400 @@ -46,7 +46,6 @@ private BytesRef initialSeekTerm = null; private boolean doSeek = true; private BytesRef actualTerm = null; - private boolean useTermsCache = false; private final TermsEnum tenum; @@ -116,16 +115,6 @@ return t; } - /** Expert: enable or disable the terms cache when seeking. */ - protected final void setUseTermsCache(boolean useTermsCache) { - this.useTermsCache = useTermsCache; - } - - /** Expert: enable or disable the terms cache when seeking. */ - protected final boolean getUseTermsCache() { - return useTermsCache; - } - /** * Returns the related attributes, the returned {@link AttributeSource} * is shared with the delegate {@code TermsEnum}. @@ -188,6 +177,11 @@ assert tenum != null; return tenum.docsAndPositions(bits, reuse); } + + @Override + public void cacheCurrentTerm() throws IOException { + tenum.cacheCurrentTerm(); + } @Override public BytesRef next() throws IOException { @@ -200,7 +194,7 @@ final BytesRef t = nextSeekTerm(actualTerm); // Make sure we always seek forward: assert actualTerm == null || t == null || getComparator().compare(t, actualTerm) > 0: "curTerm=" + actualTerm + " seekTerm=" + t; - if (t == null || tenum.seek(t, useTermsCache) == SeekStatus.END) { + if (t == null || tenum.seek(t, false) == SeekStatus.END) { // no more terms to seek to or enum exhausted return null; } diff -r 4fd904dd1987 lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java --- a/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java Mon Sep 27 15:32:21 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java Mon Sep 27 12:07:18 2010 -0400 @@ -216,6 +216,11 @@ } @Override + public void cacheCurrentTerm() throws IOException { + actualEnum.cacheCurrentTerm(); + } + + @Override public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException { return actualEnum.docs(skipDocs, reuse); } diff -r 4fd904dd1987 lucene/src/java/org/apache/lucene/search/MultiTermQuery.java --- a/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java Mon Sep 27 15:32:21 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java Mon Sep 27 12:07:18 2010 -0400 @@ -201,7 +201,8 @@ int count = 0; BytesRef bytes; while ((bytes = termsEnum.next()) != null) { - if (collector.collect(bytes, boostAtt.getBoost())) { + termsEnum.cacheCurrentTerm(); + if (collector.collect(termsEnum, bytes, boostAtt.getBoost())) { count++; } else { break; @@ -215,7 +216,7 @@ private BoostAttribute boostAtt = null; /** return false to stop collecting */ - public abstract boolean collect(BytesRef bytes, float boost) throws IOException; + public abstract boolean collect(TermsEnum termsEnum, BytesRef bytes, float boost) throws IOException; /** set the minimum boost as a hint for the term producer */ protected final void setMaxNonCompetitiveBoost(float maxNonCompetitiveBoost) { @@ -231,9 +232,10 @@ final BooleanQuery result = new BooleanQuery(true); final Term placeholderTerm = new Term(query.field); query.incTotalNumberOfTerms(collectTerms(reader, query, new TermCollector() { - public boolean collect(BytesRef bytes, float boost) { + @Override + public boolean collect(TermsEnum termsEnum, BytesRef bytes, float boost) { // add new TQ, we must clone the term, else it may get overwritten! - TermQuery tq = new TermQuery(placeholderTerm.createTerm(new BytesRef(bytes))); + TermQuery tq = new TermQuery(placeholderTerm.createTerm(new BytesRef(bytes)), termsEnum.docFreq()); tq.setBoost(query.getBoost() * boost); // set the boost result.add(tq, BooleanClause.Occur.SHOULD); // add to query return true; @@ -298,10 +300,12 @@ final int maxSize = Math.min(size, BooleanQuery.getMaxClauseCount()); final PriorityQueue stQueue = new PriorityQueue(); collectTerms(reader, query, new TermCollector() { - public boolean collect(BytesRef bytes, float boost) { + @Override + public boolean collect(TermsEnum termsEnum, BytesRef bytes, float boost) { // ignore uncompetetive hits if (stQueue.size() >= maxSize && boost <= stQueue.peek().boost) return true; + // nocommit -- stick docFreq into queue too? // add new entry in PQ, we must clone the term, else it may get overwritten! st.bytes.copy(bytes); st.boost = boost; @@ -567,18 +571,14 @@ this.termCountLimit = termCountLimit; } - public boolean collect(BytesRef bytes, float boost) throws IOException { + public boolean collect(TermsEnum termsEnum, BytesRef bytes, float boost) throws IOException { termCount++; if (termCount >= termCountLimit || docVisitCount >= docCountCutoff) { hasCutOff = true; return false; } pendingTerms.copyUsingLengthPrefix(bytes); - // Loading the TermInfo from the terms dict here - // should not be costly, because 1) the - // query/filter will load the TermInfo when it - // runs, and 2) the terms dict has a cache: - docVisitCount += reader.docFreq(field, bytes); + docVisitCount += termsEnum.docFreq(); return true; } diff -r 4fd904dd1987 lucene/src/java/org/apache/lucene/search/Similarity.java --- a/lucene/src/java/org/apache/lucene/search/Similarity.java Mon Sep 27 15:32:21 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/search/Similarity.java Mon Sep 27 12:07:18 2010 -0400 @@ -741,7 +741,7 @@ * The default implementation uses: * *

-   * idf(searcher.docFreq(term), searcher.maxDoc());
+   * idf(docFreq, searcher.maxDoc());
    * 
* * Note that {@link Searcher#maxDoc()} is used instead of @@ -752,11 +752,12 @@ * * @param term the term in question * @param searcher the document collection being searched + * @param docFreq externally computed docFreq for this term * @return an IDFExplain object that includes both an idf score factor and an explanation for the term. * @throws IOException */ - public IDFExplanation idfExplain(final Term term, final Searcher searcher) throws IOException { + public IDFExplanation idfExplain(final Term term, final Searcher searcher, int docFreq) throws IOException { final int df = searcher.docFreq(term); final int max = searcher.maxDoc(); final float idf = idf(df, max); @@ -773,6 +774,15 @@ } /** + * This method forwards to {@link + * idfExplain(Term,Searcher,int)} by passing + * searcher.docFreq(term) as the docFreq. + */ + public IDFExplanation idfExplain(final Term term, final Searcher searcher) throws IOException { + return idfExplain(term, searcher, searcher.docFreq(term)); + } + + /** * Computes a score factor for a phrase. * *

diff -r 4fd904dd1987 lucene/src/java/org/apache/lucene/search/TermQuery.java --- a/lucene/src/java/org/apache/lucene/search/TermQuery.java Mon Sep 27 15:32:21 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/search/TermQuery.java Mon Sep 27 12:07:18 2010 -0400 @@ -30,7 +30,8 @@ This may be combined with other terms with a {@link BooleanQuery}. */ public class TermQuery extends Query { - private Term term; + private final Term term; + private final int docFreq; private class TermWeight extends Weight { private final Similarity similarity; @@ -43,7 +44,11 @@ public TermWeight(Searcher searcher) throws IOException { this.similarity = getSimilarity(searcher); - idfExp = similarity.idfExplain(term, searcher); + if (docFreq != -1) { + idfExp = similarity.idfExplain(term, searcher, docFreq); + } else { + idfExp = similarity.idfExplain(term, searcher); + } idf = idfExp.getIdf(); } @@ -160,7 +165,15 @@ /** Constructs a query for the term t. */ public TermQuery(Term t) { + this(t, -1); + } + + /** Expert: constructs a TermQuery that will use the + * provided docFreq instead of looking up the docFreq + * against the searcher. */ + public TermQuery(Term t, int docFreq) { term = t; + this.docFreq = docFreq; } /** Returns the term of this query. */ diff -r 4fd904dd1987 lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java --- a/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java Mon Sep 27 15:32:21 2010 +0000 +++ b/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java Mon Sep 27 12:07:18 2010 -0400 @@ -285,6 +285,11 @@ } @Override + public void cacheCurrentTerm() throws IOException { + throw new UnsupportedOperationException(); + } + + @Override public BytesRef term() throws IOException { return term; } diff -r 4fd904dd1987 lucene/src/test/org/apache/lucene/TestExternalCodecs.java --- a/lucene/src/test/org/apache/lucene/TestExternalCodecs.java Mon Sep 27 15:32:21 2010 +0000 +++ b/lucene/src/test/org/apache/lucene/TestExternalCodecs.java Mon Sep 27 12:07:18 2010 -0400 @@ -320,6 +320,10 @@ } @Override + public void cacheCurrentTerm() { + } + + @Override public DocsEnum docs(Bits skipDocs, DocsEnum reuse) { return new RAMDocsEnum(ramField.termToDocs.get(current), skipDocs); } diff -r 4fd904dd1987 solr/src/java/org/apache/solr/request/UnInvertedField.java --- a/solr/src/java/org/apache/solr/request/UnInvertedField.java Mon Sep 27 15:32:21 2010 +0000 +++ b/solr/src/java/org/apache/solr/request/UnInvertedField.java Mon Sep 27 12:07:18 2010 -0400 @@ -1002,6 +1002,11 @@ return tenum.docFreq(); } + @Override + public void cacheCurrentTerm() { + throw new UnsupportedOperationException(); + } + public BytesRef skipTo(BytesRef target) throws IOException { // already here