Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1058306) +++ lucene/CHANGES.txt (working copy) @@ -359,6 +359,11 @@ not seek backwards when a sub-range has no terms. It now only seeks when the current term is less than the next sub-range's lower end. (Uwe Schindler, Mike McCandless) + +* LUCENE-2694: Optimize MultiTermQuery to be single pass for Term lookups. + MultiTermQuery now stores TermState per leaf reader during rewrite to re- + seek the term dictionary in TermQuery / TermWeight. + (Simon Willnauer, Mike McCandless, Robert Muir) Documentation Index: lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java =================================================================== --- lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java (revision 1058306) +++ lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java (working copy) @@ -18,10 +18,15 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.index.OrdTermState; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.codecs.PrefixCodedTermState; + +import java.io.IOException; import java.util.Arrays; import java.util.Comparator; @@ -91,10 +96,6 @@ } @Override - public void cacheCurrentTerm() { - } - - @Override public BytesRef term() { return br; } @@ -129,5 +130,18 @@ public Comparator getComparator() { return BytesRef.getUTF8SortedAsUnicodeComparator(); } + + @Override + public TermState termState() throws IOException { + final OrdTermState state = new OrdTermState(); + state.ord = upto - start; + return state; + } + + @Override + public SeekStatus seek(BytesRef term, TermState state) throws IOException { + assert state != null && state instanceof OrdTermState; + return seek(((OrdTermState)state).ord); // just use the ord for simplicity + } } Index: lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java =================================================================== --- lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (revision 1058306) +++ lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (working copy) @@ -39,6 +39,8 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Fields; import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.OrdTermState; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.FieldsEnum; @@ -884,10 +886,6 @@ } @Override - public void cacheCurrentTerm() { - } - - @Override public long ord() { return termUpto; } @@ -917,8 +915,21 @@ public Comparator getComparator() { return BytesRef.getUTF8SortedAsUnicodeComparator(); } - } + @Override + public SeekStatus seek(BytesRef term, TermState state) throws IOException { + assert state != null; + return this.seek(((OrdTermState)state).ord); + } + + @Override + public TermState termState() throws IOException { + OrdTermState ts = new OrdTermState(); + ts.ord = termUpto; + return ts; + } + } + private class MemoryDocsEnum extends DocsEnum { private ArrayIntList positions; private boolean hasNext; Index: lucene/src/java/org/apache/lucene/index/BufferedDeletes.java =================================================================== --- lucene/src/java/org/apache/lucene/index/BufferedDeletes.java (revision 1058306) +++ lucene/src/java/org/apache/lucene/index/BufferedDeletes.java (working copy) @@ -372,7 +372,6 @@ Query query = entry.getKey(); int limit = entry.getValue().intValue(); Weight weight = query.weight(searcher); - Scorer scorer = weight.scorer(readerContext, true, false); if (scorer != null) { while(true) { Index: lucene/src/java/org/apache/lucene/index/FilterIndexReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (revision 1058306) +++ lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (working copy) @@ -131,11 +131,6 @@ } @Override - public void cacheCurrentTerm() throws IOException { - in.cacheCurrentTerm(); - } - - @Override public SeekStatus seek(long ord) throws IOException { return in.seek(ord); } @@ -174,6 +169,16 @@ public Comparator getComparator() throws IOException { return in.getComparator(); } + + @Override + public SeekStatus seek(BytesRef term, TermState state) throws IOException { + return in.seek(term, state); + } + + @Override + public TermState termState() throws IOException { + return in.termState(); + } } /** Base class for filtering {@link DocsEnum} implementations. */ Index: lucene/src/java/org/apache/lucene/index/IndexReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/IndexReader.java (revision 1058306) +++ lucene/src/java/org/apache/lucene/index/IndexReader.java (working copy) @@ -1070,7 +1070,48 @@ return null; } } + + /** + * Returns {@link DocsEnum} for the specified field and + * {@link TermState}. This may return null, if either the field or the term + * does not exists or the {@link TermState} is invalid for the underlying + * implementation.*/ + public DocsEnum termDocsEnum(Bits skipDocs, String field, BytesRef term, TermState state) throws IOException { + assert state != null; + assert field != null; + final Fields fields = fields(); + if (fields == null) { + return null; + } + final Terms terms = fields.terms(field); + if (terms != null) { + return terms.docs(skipDocs, term, state, null); + } else { + return null; + } + } + + /** + * Returns {@link DocsAndPositionsEnum} for the specified field and + * {@link TermState}. This may return null, if either the field or the term + * does not exists, the {@link TermState} is invalid for the underlying + * implementation, or positions were not stored for this term.*/ + public DocsAndPositionsEnum termPositionsEnum(Bits skipDocs, String field, BytesRef term, TermState state) throws IOException { + assert state != null; + assert field != null; + final Fields fields = fields(); + if (fields == null) { + return null; + } + final Terms terms = fields.terms(field); + if (terms != null) { + return terms.docsAndPositions(skipDocs, term, state, null); + } else { + return null; + } + } + /** Deletes the document numbered docNum. Once a document is * deleted it will not appear in TermDocs or TermPositions enumerations. * Attempts to read its field with the {@link #document} Index: lucene/src/java/org/apache/lucene/index/MultiReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/MultiReader.java (revision 1058306) +++ lucene/src/java/org/apache/lucene/index/MultiReader.java (working copy) @@ -19,7 +19,6 @@ import java.io.IOException; import java.util.Collection; -import java.util.HashMap; import java.util.Map; import org.apache.lucene.document.Document; Index: lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java (revision 1058306) +++ lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java (working copy) @@ -91,13 +91,6 @@ } @Override - public void cacheCurrentTerm() throws IOException { - for(int i=0;i getComparator() { return termComp; } Index: lucene/src/java/org/apache/lucene/index/OrdTermState.java =================================================================== --- lucene/src/java/org/apache/lucene/index/OrdTermState.java (revision 0) +++ lucene/src/java/org/apache/lucene/index/OrdTermState.java (revision 0) @@ -0,0 +1,33 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * An ordinal based {@link TermState} + * + * @lucene.experimental + */ +public class OrdTermState extends TermState { + public long ord; + + @Override + public void copyFrom(TermState other) { + assert other instanceof OrdTermState : "can not copy from " + other.getClass().getName(); + this.ord = ((OrdTermState) other).ord; + } +} Property changes on: lucene/src/java/org/apache/lucene/index/OrdTermState.java ___________________________________________________________________ Added: svn:eol-style + native Added: svn:keywords + Date Author Id Revision HeadURL Index: lucene/src/java/org/apache/lucene/index/TermState.java =================================================================== --- lucene/src/java/org/apache/lucene/index/TermState.java (revision 0) +++ lucene/src/java/org/apache/lucene/index/TermState.java (revision 0) @@ -0,0 +1,47 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Encapsulates all required internal state to position the associated + * {@link TermsEnum} without re-seeking. + * + * @see TermsEnum#seek(org.apache.lucene.util.BytesRef, TermState) + * @see TermsEnum#termState() + * @lucene.experimental + */ +public abstract class TermState implements Cloneable { + + /** + * Copies the content of the given {@link TermState} to this instance + * + * @param other + * the TermState to copy + */ + public abstract void copyFrom(TermState other); + + @Override + public Object clone() { + try { + return super.clone(); + } catch (CloneNotSupportedException cnse) { + // should not happen + throw new RuntimeException(cnse); + } + } +} \ No newline at end of file Property changes on: lucene/src/java/org/apache/lucene/index/TermState.java ___________________________________________________________________ Added: svn:eol-style + native Added: svn:keywords + Date Author Id Revision HeadURL Index: lucene/src/java/org/apache/lucene/index/Terms.java =================================================================== --- lucene/src/java/org/apache/lucene/index/Terms.java (revision 1058306) +++ lucene/src/java/org/apache/lucene/index/Terms.java (working copy) @@ -80,11 +80,57 @@ } } + /** + * Expert: Get {@link DocsEnum} for the specified {@link TermState}. + * This method may return null if the term does not exist. + * + * @see TermsEnum#termState() + * @see TermsEnum#seek(BytesRef, TermState) */ + public DocsEnum docs(Bits skipDocs, BytesRef term, TermState termState, DocsEnum reuse) throws IOException { + final TermsEnum termsEnum = getThreadTermsEnum(); + if (termsEnum.seek(term, termState) == TermsEnum.SeekStatus.FOUND) { + return termsEnum.docs(skipDocs, reuse); + } else { + return null; + } + } + + /** + * Get {@link DocsEnum} for the specified {@link TermState}. This + * method will may return null if the term does not exists, or positions were + * not indexed. + * + * @see TermsEnum#termState() + * @see TermsEnum#seek(BytesRef, TermState) */ + public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, BytesRef term, TermState termState, DocsAndPositionsEnum reuse) throws IOException { + final TermsEnum termsEnum = getThreadTermsEnum(); + if (termsEnum.seek(term, termState) == TermsEnum.SeekStatus.FOUND) { + return termsEnum.docsAndPositions(skipDocs, reuse); + } else { + return null; + } + } + public long getUniqueTermCount() throws IOException { throw new UnsupportedOperationException("this reader does not implement getUniqueTermCount()"); } - protected TermsEnum getThreadTermsEnum() throws IOException { + /** + * Returns a thread-private {@link TermsEnum} instance. Obtaining + * {@link TermsEnum} from this method might be more efficient than using + * {@link #iterator()} directly since this method doesn't necessarily create a + * new {@link TermsEnum} instance. + *

+ * NOTE: {@link TermsEnum} instances obtained from this method must not be + * shared across threads. The enum should only be used within a local context + * where other threads can't access it. + * + * @return a thread-private {@link TermsEnum} instance + * @throws IOException + * if an IOException occurs + * @lucene.internal + */ + public TermsEnum getThreadTermsEnum() throws IOException { TermsEnum termsEnum = threadEnums.get(); if (termsEnum == null) { termsEnum = iterator(); Index: lucene/src/java/org/apache/lucene/index/TermsEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/index/TermsEnum.java (revision 1058306) +++ lucene/src/java/org/apache/lucene/index/TermsEnum.java (working copy) @@ -73,7 +73,34 @@ * may be before or after the current ord. See {@link * #seek(BytesRef)}. */ public abstract SeekStatus seek(long ord) throws IOException; - + + /** + * Expert: Seeks a specific position by {@link TermState} previously obtained + * from {@link #termState()}. Callers should maintain the {@link TermState} to + * use this method. Low-level implementations may position the TermsEnum + * without re-seeking the term dictionary. + *

+ * Seeking by {@link TermState} should only be used iff the enu the state was + * obtained from and the enum the state is used for seeking are obtained from + * the same {@link IndexReader}, otherwise a {@link #seek(BytesRef, TermState)} call can + * leave the enum in undefined state. + *

+ * NOTE: Using this method with an incompatible {@link TermState} might leave + * this {@link TermsEnum} in undefined state. On a segment level + * {@link TermState} instances are compatible only iff the source and the + * target {@link TermsEnum} operate on the same field. If operating on segment + * level, TermState instances must not be used across segments. + *

+ * NOTE: A seek by {@link TermState} might not restore the + * {@link AttributeSource}'s state. {@link AttributeSource} states must be + * maintained separately if this method is used. + * @param term the term the TermState corresponds to + * @param state the {@link TermState} + * */ + public SeekStatus seek(BytesRef term, TermState state) throws IOException { + return seek(term); + } + /** Increments the enumeration to the next element. * Returns the resulting term, or null if the end was * hit. The returned BytesRef may be re-used across calls @@ -98,7 +125,7 @@ * first time, after next() returns null or seek returns * {@link SeekStatus#END}.*/ public abstract int docFreq(); - + /** Get {@link DocsEnum} for the current term. Do not * call this before calling {@link #next} or {@link * #seek} for the first time. This method will not @@ -116,6 +143,25 @@ * the postings by this codec. */ public abstract DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException; + /** + * Expert: Returns the TermsEnums internal state to position the TermsEnum + * without re-seeking the term dictionary. + *

+ * NOTE: A seek by {@link TermState} might not capture the + * {@link AttributeSource}'s state. Callers must maintain the + * {@link AttributeSource} states separately + * + * @see TermState + * @see #seek(BytesRef, TermState) + */ + public TermState termState() throws IOException { + return new TermState() { + @Override + public void copyFrom(TermState other) { + } + }; + } + /** Return the {@link BytesRef} Comparator used to sort * terms provided by the iterator. This may return * null if there are no terms. Callers may invoke this @@ -123,10 +169,6 @@ * instance & reuse it. */ public abstract Comparator getComparator() throws IOException; - /** Optional optimization hint: informs the codec that the - * current term is likely to be re-seek'd-to soon. */ - public abstract void cacheCurrentTerm() throws IOException; - /** An empty TermsEnum for quickly returning an empty instance e.g. * in {@link org.apache.lucene.search.MultiTermQuery} *

Please note: This enum should be unmodifiable, @@ -142,9 +184,6 @@ public SeekStatus seek(long ord) { return SeekStatus.END; } @Override - public void cacheCurrentTerm() {} - - @Override public BytesRef term() { throw new IllegalStateException("this method should never be called"); } @@ -183,5 +222,15 @@ public synchronized AttributeSource attributes() { return super.attributes(); } + + @Override + public TermState termState() throws IOException { + throw new IllegalStateException("this method should never be called"); + } + + @Override + public SeekStatus seek(BytesRef term, TermState state) throws IOException { + throw new IllegalStateException("this method should never be called"); + } }; } Index: lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java (revision 1058306) +++ lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java (working copy) @@ -42,17 +42,17 @@ public abstract void init(IndexInput termsIn) throws IOException; /** Return a newly created empty TermState */ - public abstract TermState newTermState() throws IOException; + public abstract PrefixCodedTermState newTermState() throws IOException; - public abstract void readTerm(IndexInput termsIn, FieldInfo fieldInfo, TermState state, boolean isIndexTerm) throws IOException; + public abstract void readTerm(IndexInput termsIn, FieldInfo fieldInfo, PrefixCodedTermState state, boolean isIndexTerm) throws IOException; /** Must fully consume state, since after this call that * TermState may be reused. */ - public abstract DocsEnum docs(FieldInfo fieldInfo, TermState state, Bits skipDocs, DocsEnum reuse) throws IOException; + public abstract DocsEnum docs(FieldInfo fieldInfo, PrefixCodedTermState state, Bits skipDocs, DocsEnum reuse) throws IOException; /** Must fully consume state, since after this call that * TermState may be reused. */ - public abstract DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, TermState state, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException; + public abstract DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, PrefixCodedTermState state, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException; public abstract void close() throws IOException; } Index: lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermState.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermState.java (revision 0) +++ lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermState.java (revision 0) @@ -0,0 +1,45 @@ +package org.apache.lucene.index.codecs; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.OrdTermState; +import org.apache.lucene.index.TermState; + +/** + * Holds all state required for {@link PostingsReaderBase} + * to produce a {@link DocsEnum} without re-seeking the + * terms dict. + */ +public class PrefixCodedTermState extends OrdTermState { + public int docFreq; // how many docs have this term + public long filePointer; // fp into the terms dict primary file (_X.tis) + + @Override + public void copyFrom(TermState _other) { + assert _other instanceof PrefixCodedTermState : "can not copy from " + _other.getClass().getName(); + PrefixCodedTermState other = (PrefixCodedTermState) _other; + super.copyFrom(_other); + filePointer = other.filePointer; + docFreq = other.docFreq; + } + + @Override + public String toString() { + return super.toString() + "[ord=" + ord + ", tis.filePointer=" + filePointer + "]"; + } + +} Property changes on: lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermState.java ___________________________________________________________________ Added: svn:eol-style + native Added: svn:keywords + Date Author Id Revision HeadURL Index: lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java (revision 1058306) +++ lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java (working copy) @@ -31,6 +31,7 @@ import org.apache.lucene.index.FieldsEnum; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.store.Directory; @@ -68,7 +69,7 @@ private final Comparator termComp; // Caches the most recently looked-up field + terms: - private final DoubleBarrelLRUCache termsCache; + private final DoubleBarrelLRUCache termsCache; // Reads the terms index private TermsIndexReaderBase indexReader; @@ -84,11 +85,6 @@ public FieldAndTerm() { } - public FieldAndTerm(String field, BytesRef term) { - this.field = field; - this.term = new BytesRef(term); - } - public FieldAndTerm(FieldAndTerm other) { field = other.field; term = new BytesRef(other.term); @@ -116,7 +112,7 @@ throws IOException { this.postingsReader = postingsReader; - termsCache = new DoubleBarrelLRUCache(termsCacheSize); + termsCache = new DoubleBarrelLRUCache(termsCacheSize); this.termComp = termComp; @@ -278,10 +274,10 @@ } // Iterates through terms in this field, not supporting ord() - private class SegmentTermsEnum extends TermsEnum { + private final class SegmentTermsEnum extends TermsEnum { private final IndexInput in; private final DeltaBytesReader bytesReader; - private final TermState state; + private final PrefixCodedTermState state; private boolean seekPending; private final FieldAndTerm fieldTerm = new FieldAndTerm(); private final TermsIndexReaderBase.FieldIndexEnum indexEnum; @@ -307,14 +303,6 @@ return termComp; } - @Override - public void cacheCurrentTerm() { - TermState stateCopy = (TermState) state.clone(); - stateCopy.filePointer = in.getFilePointer(); - termsCache.put(new FieldAndTerm(fieldInfo.name, bytesReader.term), - stateCopy); - } - // called only from assert private boolean first; private int indexTermCount; @@ -342,7 +330,7 @@ * is found, SeekStatus.NOT_FOUND if a different term * was found, SeekStatus.END if we hit EOF */ @Override - public SeekStatus seek(BytesRef term, boolean useCache) throws IOException { + public SeekStatus seek(final BytesRef term, final boolean useCache) throws IOException { if (indexEnum == null) { throw new IllegalStateException("terms index was not loaded"); @@ -357,9 +345,8 @@ cachedState = termsCache.get(fieldTerm); if (cachedState != null) { state.copyFrom(cachedState); - seekPending = true; + setTermState(term, state); positioned = false; - bytesReader.term.copy(term); //System.out.println(" cached!"); return SeekStatus.FOUND; } @@ -439,12 +426,7 @@ if (cmp == 0) { // Done! if (useCache) { - // Store in cache - FieldAndTerm entryKey = new FieldAndTerm(fieldTerm); - cachedState = (TermState) state.clone(); - // this is fp after current term - cachedState.filePointer = in.getFilePointer(); - termsCache.put(entryKey, cachedState); + cacheTerm(fieldTerm); } return SeekStatus.FOUND; @@ -464,6 +446,23 @@ return SeekStatus.END; } + private final void setTermState(BytesRef term, final TermState termState) { + assert termState != null && termState instanceof PrefixCodedTermState; + state.copyFrom(termState); + seekPending = true; + bytesReader.term.copy(term); + } + + private final void cacheTerm(FieldAndTerm other) { + // Store in cache + final FieldAndTerm entryKey = new FieldAndTerm(other); + final PrefixCodedTermState cachedState = (PrefixCodedTermState) state.clone(); + // this is fp after current term + cachedState.filePointer = in.getFilePointer(); + termsCache.put(entryKey, cachedState); + } + + @Override public BytesRef term() { return bytesReader.term; @@ -498,7 +497,9 @@ postingsReader.readTerm(in, fieldInfo, state, isIndexTerm); - state.ord++; + if (doOrd) { + state.ord++; + } positioned = true; //System.out.println("te.next term=" + bytesReader.term.utf8ToString()); @@ -512,7 +513,7 @@ @Override public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException { - DocsEnum docsEnum = postingsReader.docs(fieldInfo, state, skipDocs, reuse); + final DocsEnum docsEnum = postingsReader.docs(fieldInfo, state, skipDocs, reuse); assert docsEnum != null; return docsEnum; } @@ -527,6 +528,23 @@ } @Override + public SeekStatus seek(BytesRef term, TermState otherState) throws IOException { + assert otherState != null && otherState instanceof PrefixCodedTermState; + assert otherState.getClass() == this.state.getClass() : "Illegal TermState type " + otherState.getClass(); + assert ((PrefixCodedTermState)otherState).ord < numTerms; + setTermState(term, otherState); + positioned = false; + return SeekStatus.FOUND; + } + + @Override + public TermState termState() throws IOException { + final PrefixCodedTermState newTermState = (PrefixCodedTermState) state.clone(); + newTermState.filePointer = in.getFilePointer(); + return newTermState; + } + + @Override public SeekStatus seek(long ord) throws IOException { if (indexEnum == null) { @@ -562,7 +580,6 @@ return SeekStatus.FOUND; } - @Override public long ord() { if (!doOrd) { throw new UnsupportedOperationException(); Index: lucene/src/java/org/apache/lucene/index/codecs/TermState.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/TermState.java (revision 1058306) +++ lucene/src/java/org/apache/lucene/index/codecs/TermState.java (working copy) @@ -1,56 +0,0 @@ -package org.apache.lucene.index.codecs; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.index.DocsEnum; // for javadocs - -import org.apache.lucene.index.codecs.standard.StandardPostingsReader; // javadocs - -/** - * Holds all state required for {@link StandardPostingsReader} - * to produce a {@link DocsEnum} without re-seeking the - * terms dict. - * @lucene.experimental - */ - -public class TermState implements Cloneable { - public long ord; // ord for this term - public long filePointer; // fp into the terms dict primary file (_X.tis) - public int docFreq; // how many docs have this term - - public void copyFrom(TermState other) { - ord = other.ord; - filePointer = other.filePointer; - docFreq = other.docFreq; - } - - @Override - public Object clone() { - try { - return super.clone(); - } catch (CloneNotSupportedException cnse) { - // should not happen - throw new RuntimeException(cnse); - } - } - - @Override - public String toString() { - return "tis.fp=" + filePointer + " docFreq=" + docFreq + " ord=" + ord; - } -} Index: lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java (revision 1058306) +++ lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java (working copy) @@ -33,6 +33,7 @@ import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.CompoundFileReader; @@ -742,11 +743,6 @@ } @Override - public void cacheCurrentTerm() throws IOException { - getTermsDict().cacheCurrentTerm(termEnum); - } - - @Override public SeekStatus seek(long ord) throws IOException { throw new UnsupportedOperationException(); } Index: lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java (revision 1058306) +++ lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java (working copy) @@ -22,8 +22,9 @@ import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.codecs.PostingsReaderBase; -import org.apache.lucene.index.codecs.TermState; +import org.apache.lucene.index.codecs.PrefixCodedTermState; import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.ArrayUtil; @@ -56,10 +57,10 @@ wrappedPostingsReader.init(termsIn); } - private static class PulsingTermState extends TermState { + private static class PulsingTermState extends PrefixCodedTermState { private byte[] postings; private int postingsSize; // -1 if this term was not inlined - private TermState wrappedTermState; + private PrefixCodedTermState wrappedTermState; private boolean pendingIndexTerm; @Override @@ -71,7 +72,7 @@ System.arraycopy(postings, 0, clone.postings, 0, postingsSize); } else { assert wrappedTermState != null; - clone.wrappedTermState = (TermState) wrappedTermState.clone(); + clone.wrappedTermState = (PrefixCodedTermState) wrappedTermState.clone(); } return clone; } @@ -102,15 +103,14 @@ } @Override - public TermState newTermState() throws IOException { + public PrefixCodedTermState newTermState() throws IOException { PulsingTermState state = new PulsingTermState(); state.wrappedTermState = wrappedPostingsReader.newTermState(); return state; } @Override - public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, TermState _termState, boolean isIndexTerm) throws IOException { - + public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, PrefixCodedTermState _termState, boolean isIndexTerm) throws IOException { PulsingTermState termState = (PulsingTermState) _termState; termState.pendingIndexTerm |= isIndexTerm; @@ -137,7 +137,7 @@ // TODO: we could actually reuse, by having TL that // holds the last wrapped reuse, and vice-versa @Override - public DocsEnum docs(FieldInfo field, TermState _termState, Bits skipDocs, DocsEnum reuse) throws IOException { + public DocsEnum docs(FieldInfo field, PrefixCodedTermState _termState, Bits skipDocs, DocsEnum reuse) throws IOException { PulsingTermState termState = (PulsingTermState) _termState; if (termState.postingsSize != -1) { PulsingDocsEnum postings; @@ -162,7 +162,7 @@ // TODO: -- not great that we can't always reuse @Override - public DocsAndPositionsEnum docsAndPositions(FieldInfo field, TermState _termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException { + public DocsAndPositionsEnum docsAndPositions(FieldInfo field, PrefixCodedTermState _termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException { if (field.omitTermFreqAndPositions) { return null; } Index: lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (revision 1058306) +++ lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (working copy) @@ -25,8 +25,9 @@ import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.codecs.PostingsReaderBase; -import org.apache.lucene.index.codecs.TermState; +import org.apache.lucene.index.codecs.PrefixCodedTermState; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.Bits; @@ -129,12 +130,13 @@ } } - private static class SepTermState extends TermState { + private static final class SepTermState extends PrefixCodedTermState { // We store only the seek point to the docs file because // the rest of the info (freqIndex, posIndex, etc.) is // stored in the docs file: IntIndexInput.Index docIndex; - + + @Override public Object clone() { SepTermState other = (SepTermState) super.clone(); other.docIndex = (IntIndexInput.Index) docIndex.clone(); @@ -154,19 +156,19 @@ } @Override - public TermState newTermState() throws IOException { + public PrefixCodedTermState newTermState() throws IOException { final SepTermState state = new SepTermState(); state.docIndex = docIn.index(); return state; } @Override - public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, TermState termState, boolean isIndexTerm) throws IOException { + public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, PrefixCodedTermState termState, boolean isIndexTerm) throws IOException { ((SepTermState) termState).docIndex.read(termsIn, isIndexTerm); } @Override - public DocsEnum docs(FieldInfo fieldInfo, TermState _termState, Bits skipDocs, DocsEnum reuse) throws IOException { + public DocsEnum docs(FieldInfo fieldInfo, PrefixCodedTermState _termState, Bits skipDocs, DocsEnum reuse) throws IOException { final SepTermState termState = (SepTermState) _termState; SepDocsEnum docsEnum; if (reuse == null || !(reuse instanceof SepDocsEnum)) { @@ -185,7 +187,7 @@ } @Override - public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, TermState _termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException { + public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, PrefixCodedTermState _termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException { assert !fieldInfo.omitTermFreqAndPositions; final SepTermState termState = (SepTermState) _termState; SepDocsAndPositionsEnum postingsEnum; Index: lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java (revision 1058306) +++ lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java (working copy) @@ -21,6 +21,7 @@ import org.apache.lucene.index.codecs.FieldsProducer; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.FieldsEnum; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; @@ -152,10 +153,6 @@ } @Override - public void cacheCurrentTerm() { - } - - @Override public BytesRef next() throws IOException { assert !ended; final BytesRefFSTEnum.InputOutput> result = fstEnum.next(); @@ -214,7 +211,7 @@ } return docsAndPositionsEnum.reset(docsStart, skipDocs); } - + @Override public Comparator getComparator() { return BytesRef.getUTF8SortedAsUnicodeComparator(); @@ -439,7 +436,6 @@ } private class SimpleTextTerms extends Terms { - private final String field; private final long termsStart; private final boolean omitTF; private FST> fst; @@ -447,7 +443,6 @@ private final BytesRef scratch = new BytesRef(10); public SimpleTextTerms(String field, long termsStart) throws IOException { - this.field = StringHelper.intern(field); this.termsStart = termsStart; omitTF = fieldInfos.fieldInfo(field).omitTermFreqAndPositions; loadTerms(); Index: lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java (revision 1058306) +++ lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java (working copy) @@ -26,8 +26,9 @@ import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.codecs.PostingsReaderBase; -import org.apache.lucene.index.codecs.TermState; +import org.apache.lucene.index.codecs.PrefixCodedTermState; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -83,20 +84,20 @@ } // Must keep final because we do non-standard clone - private final static class DocTermState extends TermState { + private final static class StandardTermState extends PrefixCodedTermState { long freqOffset; long proxOffset; int skipOffset; public Object clone() { - DocTermState other = new DocTermState(); + StandardTermState other = new StandardTermState(); other.copyFrom(this); return other; } public void copyFrom(TermState _other) { super.copyFrom(_other); - DocTermState other = (DocTermState) _other; + StandardTermState other = (StandardTermState) _other; freqOffset = other.freqOffset; proxOffset = other.proxOffset; skipOffset = other.skipOffset; @@ -108,8 +109,8 @@ } @Override - public TermState newTermState() { - return new DocTermState(); + public PrefixCodedTermState newTermState() { + return new StandardTermState(); } @Override @@ -126,10 +127,9 @@ } @Override - public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, TermState termState, boolean isIndexTerm) + public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, PrefixCodedTermState termState, boolean isIndexTerm) throws IOException { - - final DocTermState docTermState = (DocTermState) termState; + final StandardTermState docTermState = (StandardTermState) termState; if (isIndexTerm) { docTermState.freqOffset = termsIn.readVLong(); @@ -153,7 +153,7 @@ } @Override - public DocsEnum docs(FieldInfo fieldInfo, TermState termState, Bits skipDocs, DocsEnum reuse) throws IOException { + public DocsEnum docs(FieldInfo fieldInfo, PrefixCodedTermState termState, Bits skipDocs, DocsEnum reuse) throws IOException { SegmentDocsEnum docsEnum; if (reuse == null || !(reuse instanceof SegmentDocsEnum)) { docsEnum = new SegmentDocsEnum(freqIn); @@ -166,11 +166,11 @@ docsEnum = new SegmentDocsEnum(freqIn); } } - return docsEnum.reset(fieldInfo, (DocTermState) termState, skipDocs); + return docsEnum.reset(fieldInfo, (StandardTermState) termState, skipDocs); } @Override - public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, TermState termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException { + public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, PrefixCodedTermState termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException { if (fieldInfo.omitTermFreqAndPositions) { return null; } @@ -189,7 +189,7 @@ docsEnum = new SegmentDocsAndPositionsAndPayloadsEnum(freqIn, proxIn); } } - return docsEnum.reset(fieldInfo, (DocTermState) termState, skipDocs); + return docsEnum.reset(fieldInfo, (StandardTermState) termState, skipDocs); } else { SegmentDocsAndPositionsEnum docsEnum; if (reuse == null || !(reuse instanceof SegmentDocsAndPositionsEnum)) { @@ -203,7 +203,7 @@ docsEnum = new SegmentDocsAndPositionsEnum(freqIn, proxIn); } } - return docsEnum.reset(fieldInfo, (DocTermState) termState, skipDocs); + return docsEnum.reset(fieldInfo, (StandardTermState) termState, skipDocs); } } @@ -233,7 +233,7 @@ this.freqIn = (IndexInput) freqIn.clone(); } - public SegmentDocsEnum reset(FieldInfo fieldInfo, DocTermState termState, Bits skipDocs) throws IOException { + public SegmentDocsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits skipDocs) throws IOException { omitTF = fieldInfo.omitTermFreqAndPositions; if (omitTF) { freq = 1; @@ -407,7 +407,7 @@ this.proxIn = (IndexInput) proxIn.clone(); } - public SegmentDocsAndPositionsEnum reset(FieldInfo fieldInfo, DocTermState termState, Bits skipDocs) throws IOException { + public SegmentDocsAndPositionsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits skipDocs) throws IOException { assert !fieldInfo.omitTermFreqAndPositions; assert !fieldInfo.storePayloads; @@ -594,7 +594,7 @@ this.proxIn = (IndexInput) proxIn.clone(); } - public SegmentDocsAndPositionsAndPayloadsEnum reset(FieldInfo fieldInfo, DocTermState termState, Bits skipDocs) throws IOException { + public SegmentDocsAndPositionsAndPayloadsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits skipDocs) throws IOException { assert !fieldInfo.omitTermFreqAndPositions; assert fieldInfo.storePayloads; if (payload == null) { Index: lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java =================================================================== --- lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java (revision 1058306) +++ lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java (working copy) @@ -21,9 +21,15 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefHash; +import org.apache.lucene.util.PerReaderTermState; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray; class ConstantScoreAutoRewrite extends TermCollectingRewrite { @@ -71,8 +77,8 @@ } @Override - protected void addClause(BooleanQuery topLevel, Term term, int docFreq, float boost /*ignored*/) { - topLevel.add(new TermQuery(term, docFreq), BooleanClause.Occur.SHOULD); + protected void addClause(BooleanQuery topLevel, Term term, int docFreq, float boost /*ignored*/, PerReaderTermState states) { + topLevel.add(new TermQuery(term, states), BooleanClause.Occur.SHOULD); } @Override @@ -98,9 +104,10 @@ final BytesRefHash pendingTerms = col.pendingTerms; final int sort[] = pendingTerms.sort(col.termsEnum.getComparator()); for(int i = 0; i < size; i++) { + final int pos = sort[i]; // docFreq is not used for constant score here, we pass 1 // to explicitely set a fake value, so it's not calculated - addClause(bq, placeholderTerm.createTerm(pendingTerms.get(sort[i], new BytesRef())), 1, 1.0f); + addClause(bq, placeholderTerm.createTerm(pendingTerms.get(pos, new BytesRef())), 1, 1.0f, col.array.termState[pos]); } // Strip scores final Query result = new ConstantScoreQuery(bq); @@ -123,12 +130,21 @@ @Override public boolean collect(BytesRef bytes) throws IOException { - pendingTerms.add(bytes); + int pos = pendingTerms.add(bytes); docVisitCount += termsEnum.docFreq(); if (pendingTerms.size() >= termCountLimit || docVisitCount >= docCountCutoff) { hasCutOff = true; return false; } + + final TermState termState = termsEnum.termState(); + assert termState != null; + if (pos < 0) { + pos = (-pos)-1; + array.termState[pos].register(termState, readerContext.ord, termsEnum.docFreq()); + } else { + array.termState[pos] = new PerReaderTermState(topReaderContext, termState, readerContext.ord, termsEnum.docFreq()); + } return true; } @@ -137,7 +153,8 @@ TermsEnum termsEnum; final int docCountCutoff, termCountLimit; - final BytesRefHash pendingTerms = new BytesRefHash(); + final TermStateByteStart array = new TermStateByteStart(16); + final BytesRefHash pendingTerms = new BytesRefHash(new ByteBlockPool(new ByteBlockPool.DirectAllocator()), 16, array); } @Override @@ -166,4 +183,40 @@ return true; } + + /** Special implementation of BytesStartArray that keeps parallel arrays for {@link PerReaderTermState} */ + static final class TermStateByteStart extends DirectBytesStartArray { + PerReaderTermState[] termState; + + public TermStateByteStart(int initSize) { + super(initSize); + } + + @Override + public int[] init() { + final int[] ord = super.init(); + termState = new PerReaderTermState[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; + assert termState.length >= ord.length; + return ord; + } + + @Override + public int[] grow() { + final int[] ord = super.grow(); + if (termState.length < ord.length) { + PerReaderTermState[] tmpTermState = new PerReaderTermState[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; + System.arraycopy(termState, 0, tmpTermState, 0, termState.length); + termState = tmpTermState; + } + assert termState.length >= ord.length; + return ord; + } + + @Override + public int[] clear() { + termState = null; + return super.clear(); + } + + } } Index: lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java (revision 1058306) +++ lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java (working copy) @@ -21,6 +21,7 @@ import java.util.Comparator; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; @@ -155,12 +156,24 @@ public DocsAndPositionsEnum docsAndPositions(Bits bits, DocsAndPositionsEnum reuse) throws IOException { return tenum.docsAndPositions(bits, reuse); } - + + /** This enum does not support seeking! + * @throws UnsupportedOperationException + */ @Override - public void cacheCurrentTerm() throws IOException { - tenum.cacheCurrentTerm(); + public SeekStatus seek(BytesRef term, TermState state) throws IOException { + throw new UnsupportedOperationException(getClass().getName()+" does not support seeking"); } - + + /** + * Returns the filtered enums term state + */ + @Override + public TermState termState() throws IOException { + assert tenum != null; + return tenum.termState(); + } + @SuppressWarnings("fallthrough") @Override public BytesRef next() throws IOException { Index: lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java (revision 1058306) +++ lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java (working copy) @@ -20,6 +20,7 @@ import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeImpl; @@ -245,11 +246,6 @@ } @Override - public void cacheCurrentTerm() throws IOException { - actualEnum.cacheCurrentTerm(); - } - - @Override public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException { return actualEnum.docs(skipDocs, reuse); } @@ -260,6 +256,15 @@ return actualEnum.docsAndPositions(skipDocs, reuse); } + public SeekStatus seek(BytesRef term, TermState state) throws IOException { + return actualEnum.seek(term, state); + } + + @Override + public TermState termState() throws IOException { + return actualEnum.termState(); + } + @Override public Comparator getComparator() throws IOException { return actualEnum.getComparator(); Index: lucene/src/java/org/apache/lucene/search/MultiTermQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/MultiTermQuery.java (revision 1058306) +++ lucene/src/java/org/apache/lucene/search/MultiTermQuery.java (working copy) @@ -26,6 +26,7 @@ import org.apache.lucene.index.TermsEnum; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.PerReaderTermState; /** * An abstract {@link Query} that matches documents @@ -159,8 +160,8 @@ } @Override - protected void addClause(BooleanQuery topLevel, Term term, int docCount, float boost) { - final TermQuery tq = new TermQuery(term, docCount); + protected void addClause(BooleanQuery topLevel, Term term, int docCount, float boost, PerReaderTermState states) { + final TermQuery tq = new TermQuery(term, states); tq.setBoost(boost); topLevel.add(tq, BooleanClause.Occur.SHOULD); } @@ -200,8 +201,8 @@ } @Override - protected void addClause(BooleanQuery topLevel, Term term, int docFreq, float boost) { - final Query q = new ConstantScoreQuery(new TermQuery(term, docFreq)); + protected void addClause(BooleanQuery topLevel, Term term, int docFreq, float boost, PerReaderTermState states) { + final Query q = new ConstantScoreQuery(new TermQuery(term, states)); q.setBoost(boost); topLevel.add(q, BooleanClause.Occur.SHOULD); } Index: lucene/src/java/org/apache/lucene/search/ScoringRewrite.java =================================================================== --- lucene/src/java/org/apache/lucene/search/ScoringRewrite.java (revision 1058306) +++ lucene/src/java/org/apache/lucene/search/ScoringRewrite.java (working copy) @@ -20,6 +20,7 @@ import java.io.IOException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.MultiTermQuery.RewriteMethod; @@ -27,6 +28,7 @@ import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefHash; +import org.apache.lucene.util.PerReaderTermState; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray; @@ -53,8 +55,9 @@ } @Override - protected void addClause(BooleanQuery topLevel, Term term, int docCount, float boost) { - final TermQuery tq = new TermQuery(term, docCount); + protected void addClause(BooleanQuery topLevel, Term term, int docCount, + float boost, PerReaderTermState states) { + final TermQuery tq = new TermQuery(term, states); tq.setBoost(boost); topLevel.add(tq, BooleanClause.Occur.SHOULD); } @@ -114,13 +117,13 @@ final int size = col.terms.size(); if (size > 0) { final int sort[] = col.terms.sort(col.termsEnum.getComparator()); - final int[] docFreq = col.array.docFreq; final float[] boost = col.array.boost; + final PerReaderTermState[] termStates = col.array.termState; for (int i = 0; i < size; i++) { final int pos = sort[i]; final Term term = placeholderTerm.createTerm(col.terms.get(pos, new BytesRef())); - assert reader.docFreq(term) == docFreq[pos]; - addClause(result, term, docFreq[pos], query.getBoost() * boost[pos]); + assert reader.docFreq(term) == termStates[pos].docFreq(); + addClause(result, term, termStates[pos].docFreq(), query.getBoost() * boost[pos], termStates[pos]); } } query.incTotalNumberOfTerms(size); @@ -143,15 +146,17 @@ @Override public boolean collect(BytesRef bytes) throws IOException { final int e = terms.add(bytes); + final TermState state = termsEnum.termState(); + assert state != null; if (e < 0 ) { // duplicate term: update docFreq final int pos = (-e)-1; - array.docFreq[pos] += termsEnum.docFreq(); + array.termState[pos].register(state, readerContext.ord, termsEnum.docFreq()); assert array.boost[pos] == boostAtt.getBoost() : "boost should be equal in all segment TermsEnums"; } else { // new entry: we populate the entry initially - array.docFreq[e] = termsEnum.docFreq(); array.boost[e] = boostAtt.getBoost(); + array.termState[e] = new PerReaderTermState(topReaderContext, state, readerContext.ord, termsEnum.docFreq()); ScoringRewrite.this.checkMaxClauseCount(terms.size()); } return true; @@ -160,8 +165,8 @@ /** Special implementation of BytesStartArray that keeps parallel arrays for boost and docFreq */ static final class TermFreqBoostByteStart extends DirectBytesStartArray { - int[] docFreq; float[] boost; + PerReaderTermState[] termState; public TermFreqBoostByteStart(int initSize) { super(initSize); @@ -171,24 +176,28 @@ public int[] init() { final int[] ord = super.init(); boost = new float[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_FLOAT)]; - docFreq = new int[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_INT)]; - assert boost.length >= ord.length && docFreq.length >= ord.length; + termState = new PerReaderTermState[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; + assert termState.length >= ord.length && boost.length >= ord.length; return ord; } @Override public int[] grow() { final int[] ord = super.grow(); - docFreq = ArrayUtil.grow(docFreq, ord.length); boost = ArrayUtil.grow(boost, ord.length); - assert boost.length >= ord.length && docFreq.length >= ord.length; + if (termState.length < ord.length) { + PerReaderTermState[] tmpTermState = new PerReaderTermState[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; + System.arraycopy(termState, 0, tmpTermState, 0, termState.length); + termState = tmpTermState; + } + assert termState.length >= ord.length && boost.length >= ord.length; return ord; } @Override public int[] clear() { boost = null; - docFreq = null; + termState = null; return super.clear(); } Index: lucene/src/java/org/apache/lucene/search/TermCollectingRewrite.java =================================================================== --- lucene/src/java/org/apache/lucene/search/TermCollectingRewrite.java (revision 1058306) +++ lucene/src/java/org/apache/lucene/search/TermCollectingRewrite.java (working copy) @@ -18,8 +18,6 @@ */ import java.io.IOException; -import java.util.ArrayList; -import java.util.List; import java.util.Comparator; import org.apache.lucene.index.Fields; @@ -27,25 +25,33 @@ import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.IndexReader.ReaderContext; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.PerReaderTermState; import org.apache.lucene.util.ReaderUtil; abstract class TermCollectingRewrite extends MultiTermQuery.RewriteMethod { + /** Return a suitable top-level Query for holding all expanded terms. */ protected abstract Q getTopLevelQuery() throws IOException; /** Add a MultiTermQuery term to the top-level query */ - protected abstract void addClause(Q topLevel, Term term, int docCount, float boost) throws IOException; + protected final void addClause(Q topLevel, Term term, int docCount, float boost) throws IOException { + addClause(topLevel, term, docCount, boost, null); + } + + protected abstract void addClause(Q topLevel, Term term, int docCount, float boost, PerReaderTermState states) throws IOException; + protected final void collectTerms(IndexReader reader, MultiTermQuery query, TermCollector collector) throws IOException { - final List subReaders = new ArrayList(); - ReaderUtil.gatherSubReaders(subReaders, reader); + ReaderContext topReaderContext = reader.getTopReaderContext(); Comparator lastTermComp = null; - - for (IndexReader r : subReaders) { - final Fields fields = r.fields(); + final AtomicReaderContext[] leaves = ReaderUtil.leaves(topReaderContext); + for (AtomicReaderContext context : leaves) { + final Fields fields = context.reader.fields(); if (fields == null) { // reader has no fields continue; @@ -68,11 +74,10 @@ if (lastTermComp != null && newTermComp != null && newTermComp != lastTermComp) throw new RuntimeException("term comparator should not change between segments: "+lastTermComp+" != "+newTermComp); lastTermComp = newTermComp; - + collector.setReaderContext(topReaderContext, context); collector.setNextEnum(termsEnum); BytesRef bytes; while ((bytes = termsEnum.next()) != null) { - termsEnum.cacheCurrentTerm(); if (!collector.collect(bytes)) return; // interrupt whole term collection, so also don't iterate other subReaders } @@ -80,6 +85,14 @@ } protected static abstract class TermCollector { + + protected AtomicReaderContext readerContext; + protected ReaderContext topReaderContext; + + public void setReaderContext(ReaderContext topReaderContext, AtomicReaderContext readerContext) { + this.readerContext = readerContext; + this.topReaderContext = topReaderContext; + } /** attributes used for communication with the enum */ public final AttributeSource attributes = new AttributeSource(); Index: lucene/src/java/org/apache/lucene/search/TermQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/TermQuery.java (revision 1058306) +++ lucene/src/java/org/apache/lucene/search/TermQuery.java (working copy) @@ -22,10 +22,14 @@ import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.TermState; +import org.apache.lucene.index.Terms; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader.ReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.search.Explanation.IDFExplanation; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.PerReaderTermState; import org.apache.lucene.util.ToStringUtils; /** A Query that matches documents containing a term. @@ -33,20 +37,22 @@ */ public class TermQuery extends Query { private final Term term; - private final int docFreq; + private int docFreq; + private transient PerReaderTermState perReaderTermState; private class TermWeight extends Weight { private final Similarity similarity; private float value; - private float idf; + private final float idf; private float queryNorm; private float queryWeight; - private IDFExplanation idfExp; - private transient ReaderContext weightContext; // only set if -ea for assert in scorer() + private final IDFExplanation idfExp; + private transient PerReaderTermState termStates; - public TermWeight(IndexSearcher searcher) + public TermWeight(IndexSearcher searcher, PerReaderTermState termStates, int docFreq) throws IOException { - assert setWeightContext(searcher); + assert termStates != null : "PerReaderTermState must not be null"; + this.termStates = termStates; this.similarity = getSimilarity(searcher); if (docFreq != -1) { idfExp = similarity.idfExplain(term, searcher, docFreq); @@ -80,31 +86,34 @@ @Override public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer) throws IOException { - assert assertTopReaderContext(context); + final String field = term.field(); final IndexReader reader = context.reader; - DocsEnum docs = reader.termDocsEnum(reader.getDeletedDocs(), - term.field(), - term.bytes()); - - if (docs == null) { + assert assertTopReaderContext(termStates, context) : "The top-reader used to create Weight is not the same as the current reader's top-reader"; + final TermState state = termStates + .get(context.ord); + if (state == null) { // term is not present in that reader + assert termNotInReader(reader, field, term.bytes()) : "no termstate found but term exists in reader"; return null; } - - return new TermScorer(this, docs, similarity, reader.norms(term.field())); + final DocsEnum docs = reader.termDocsEnum(reader.getDeletedDocs(), field, term.bytes(), state); + assert docs != null; + return new TermScorer(this, docs, similarity, context.reader.norms(field)); + } + + private boolean termNotInReader(IndexReader reader, String field, BytesRef bytes) throws IOException { + // only called from assert + final Terms terms = reader.terms(field); + return terms == null || terms.docFreq(bytes) == 0; } - private boolean assertTopReaderContext(ReaderContext context) { - while (context.parent != null) { + private boolean assertTopReaderContext(PerReaderTermState state, ReaderContext context) { + while(context.parent != null) { context = context.parent; } - return weightContext == context; + return state.topReaderContext == context; } - private boolean setWeightContext(IndexSearcher searcher) { - weightContext = searcher.getTopReaderContext(); - return true; - } - + @Override public Explanation explain(AtomicReaderContext context, int doc) throws IOException { @@ -157,7 +166,7 @@ fieldExpl.addDetail(expl); Explanation fieldNormExpl = new Explanation(); - byte[] fieldNorms = reader.norms(field); + final byte[] fieldNorms = reader.norms(field); float fieldNorm = fieldNorms!=null ? similarity.decodeNormValue(fieldNorms[doc]) : 1.0f; fieldNormExpl.setValue(fieldNorm); @@ -193,6 +202,17 @@ public TermQuery(Term t, int docFreq) { term = t; this.docFreq = docFreq; + perReaderTermState = null; + } + + /** Expert: constructs a TermQuery that will use the + * provided docFreq instead of looking up the docFreq + * against the searcher. */ + public TermQuery(Term t, PerReaderTermState states) { + assert states != null; + term = t; + docFreq = states.docFreq(); + perReaderTermState = states; } /** Returns the term of this query. */ @@ -200,7 +220,21 @@ @Override public Weight createWeight(IndexSearcher searcher) throws IOException { - return new TermWeight(searcher); + final ReaderContext context = searcher.getTopReaderContext(); + final int weightDocFreq; + final PerReaderTermState termState; + if (perReaderTermState == null || perReaderTermState.topReaderContext != context) { + // make TermQuery single-pass if we don't have a PRTS or if the context differs! + termState = PerReaderTermState.build(context, term, true); // cache term lookups! + // we must not ignore the given docFreq - if set use the given value + weightDocFreq = docFreq == -1 ? termState.docFreq() : docFreq; + } else { + // PRTS was pre-build for this IS + termState = this.perReaderTermState; + weightDocFreq = docFreq; + } + + return new TermWeight(searcher, termState, weightDocFreq); } @Override Index: lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java =================================================================== --- lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java (revision 1058306) +++ lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java (working copy) @@ -25,9 +25,11 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.PerReaderTermState; /** * Base rewrite method for collecting only the top terms @@ -78,12 +80,12 @@ this.termComp = termsEnum.getComparator(); // lazy init the initial ScoreTerm because comparator is not known on ctor: if (st == null) - st = new ScoreTerm(this.termComp); + st = new ScoreTerm(this.termComp, new PerReaderTermState(topReaderContext)); boostAtt = termsEnum.attributes().addAttribute(BoostAttribute.class); } @Override - public boolean collect(BytesRef bytes) { + public boolean collect(BytesRef bytes) throws IOException { final float boost = boostAtt.getBoost(); // ignore uncompetetive hits if (stQueue.size() == maxSize) { @@ -94,23 +96,27 @@ return true; } ScoreTerm t = visitedTerms.get(bytes); + final TermState state = termsEnum.termState(); + assert state != null; if (t != null) { // if the term is already in the PQ, only update docFreq of term in PQ - t.docFreq += termsEnum.docFreq(); assert t.boost == boost : "boost should be equal in all segment TermsEnums"; + t.termState.register(state, readerContext.ord, termsEnum.docFreq()); } else { // add new entry in PQ, we must clone the term, else it may get overwritten! st.bytes.copy(bytes); st.boost = boost; - st.docFreq = termsEnum.docFreq(); visitedTerms.put(st.bytes, st); + assert st.termState.docFreq() == 0; + st.termState.register(state, readerContext.ord, termsEnum.docFreq()); stQueue.offer(st); // possibly drop entries from queue if (stQueue.size() > maxSize) { st = stQueue.poll(); visitedTerms.remove(st.bytes); + st.termState.clear(); // reset the termstate! } else { - st = new ScoreTerm(termComp); + st = new ScoreTerm(termComp, new PerReaderTermState(topReaderContext)); } assert stQueue.size() <= maxSize : "the PQ size must be limited to maxSize"; // set maxBoostAtt with values to help FuzzyTermsEnum to optimize @@ -120,6 +126,7 @@ maxBoostAtt.setCompetitiveTerm(t.bytes); } } + return true; } }); @@ -130,8 +137,8 @@ ArrayUtil.quickSort(scoreTerms, scoreTermSortByTermComp); for (final ScoreTerm st : scoreTerms) { final Term term = placeholderTerm.createTerm(st.bytes); - assert reader.docFreq(term) == st.docFreq; - addClause(q, term, st.docFreq, query.getBoost() * st.boost); // add to query + assert reader.docFreq(term) == st.termState.docFreq() : "reader DF is " + reader.docFreq(term) + " vs " + st.termState.docFreq(); + addClause(q, term, st.termState.docFreq(), query.getBoost() * st.boost, st.termState); // add to query } query.incTotalNumberOfTerms(scoreTerms.length); return q; @@ -147,7 +154,7 @@ if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; - final TopTermsRewrite other = (TopTermsRewrite) obj; + final TopTermsRewrite other = (TopTermsRewrite) obj; if (size != other.size) return false; return true; } @@ -163,13 +170,12 @@ static final class ScoreTerm implements Comparable { public final Comparator termComp; - public final BytesRef bytes = new BytesRef(); public float boost; - public int docFreq; - - public ScoreTerm(Comparator termComp) { + public final PerReaderTermState termState; + public ScoreTerm(Comparator termComp, PerReaderTermState termState) { this.termComp = termComp; + this.termState = termState; } public int compareTo(ScoreTerm other) { Index: lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java =================================================================== --- lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java (revision 1058306) +++ lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java (working copy) @@ -24,8 +24,11 @@ import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.OrdTermState; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.codecs.PrefixCodedTermState; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.FieldCache.DocTermsIndex; import org.apache.lucene.util.ArrayUtil; @@ -304,11 +307,6 @@ } @Override - public void cacheCurrentTerm() throws IOException { - throw new UnsupportedOperationException(); - } - - @Override public BytesRef term() throws IOException { return term; } @@ -337,6 +335,19 @@ public Comparator getComparator() throws IOException { return BytesRef.getUTF8SortedAsUnicodeComparator(); } + + @Override + public SeekStatus seek(BytesRef term, TermState state) throws IOException { + assert state != null && state instanceof OrdTermState; + return this.seek(((OrdTermState)state).ord); + } + + @Override + public TermState termState() throws IOException { + OrdTermState state = new OrdTermState(); + state.ord = currentOrd; + return state; + } } } } Index: lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java =================================================================== --- lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java (revision 1058306) +++ lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java (working copy) @@ -26,6 +26,7 @@ import org.apache.lucene.search.TopTermsRewrite; import org.apache.lucene.search.ScoringRewrite; import org.apache.lucene.search.BooleanClause.Occur; // javadocs only +import org.apache.lucene.util.PerReaderTermState; /** * Wraps any {@link MultiTermQuery} as a {@link SpanQuery}, @@ -153,7 +154,7 @@ } @Override - protected void addClause(SpanOrQuery topLevel, Term term, int docCount, float boost) { + protected void addClause(SpanOrQuery topLevel, Term term, int docCount, float boost, PerReaderTermState states) { final SpanTermQuery q = new SpanTermQuery(term); q.setBoost(boost); topLevel.addClause(q); @@ -202,7 +203,7 @@ } @Override - protected void addClause(SpanOrQuery topLevel, Term term, int docFreq, float boost) { + protected void addClause(SpanOrQuery topLevel, Term term, int docFreq, float boost, PerReaderTermState states) { final SpanTermQuery q = new SpanTermQuery(term); q.setBoost(boost); topLevel.addClause(q); Index: lucene/src/java/org/apache/lucene/util/PerReaderTermState.java =================================================================== --- lucene/src/java/org/apache/lucene/util/PerReaderTermState.java (revision 0) +++ lucene/src/java/org/apache/lucene/util/PerReaderTermState.java (revision 0) @@ -0,0 +1,148 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Arrays; + +import org.apache.lucene.index.Fields; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermState; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.IndexReader.ReaderContext; +import org.apache.lucene.index.TermsEnum.SeekStatus; + +/** + * Maintains a {@link IndexReader} {@link TermState} view over + * {@link IndexReader} instances containing a single term. The + * {@link PerReaderTermState} doesn't track if the given {@link TermState} + * objects are valid, neither if the {@link TermState} instances refer to the + * same terms in the associated readers. + * + * @lucene.experimental + */ +public final class PerReaderTermState { + public final ReaderContext topReaderContext; // for asserting! + private final TermState[] states; + private int docFreq; + + /** + * Creates an empty {@link PerReaderTermState} from a {@link ReaderContext} + */ + public PerReaderTermState(ReaderContext context) { + assert context != null && context.isTopLevel; + topReaderContext = context; + docFreq = 0; + final int len; + if (context.leaves() == null) { + len = 1; + } else { + len = context.leaves().length; + } + states = new TermState[len]; + } + + /** + * Creates a {@link PerReaderTermState} with an initial {@link TermState}, + * {@link IndexReader} pair. + */ + public PerReaderTermState(ReaderContext context, TermState state, int ord, int docFreq) { + this(context); + register(state, ord, docFreq); + } + + /** + * Creates a {@link PerReaderTermState} from a top-level {@link ReaderContext} and the + * given {@link Term}. This method will lookup the given term in all context's leaf readers + * and register each of the readers containing the term in the returned {@link PerReaderTermState} + * using the leaf reader's ordinal. + *

+ * Note: the given context must be a top-level context. + */ + public static PerReaderTermState build(ReaderContext context, Term term, boolean cache) + throws IOException { + assert context != null && context.isTopLevel; + final String field = term.field(); + final BytesRef bytes = term.bytes(); + final PerReaderTermState perReaderTermState = new PerReaderTermState(context); + final AtomicReaderContext[] leaves = ReaderUtil.leaves(context); + for (int i = 0; i < leaves.length; i++) { + final Fields fields = leaves[i].reader.fields(); + if (fields != null) { + final Terms terms = fields.terms(field); + if (terms != null) { + final TermsEnum termsEnum = terms.getThreadTermsEnum(); // thread-private don't share! + if (SeekStatus.FOUND == termsEnum.seek(bytes, cache)) { + final TermState termState = termsEnum.termState(); + perReaderTermState.register(termState, leaves[i].ord, termsEnum.docFreq()); + } + } + } + } + return perReaderTermState; + } + + /** + * Clears the {@link PerReaderTermState} internal state and removes all + * registered {@link TermState}s + */ + public void clear() { + docFreq = 0; + Arrays.fill(states, null); + } + + /** + * Registers and associates a {@link TermState} with an leaf ordinal. The leaf ordinal + * should be derived from a {@link ReaderContext}'s leaf ord. + */ + public void register(TermState state, final int ord, final int docFreq) { + assert state != null : "state must not be null"; + assert ord >= 0 && ord < states.length; + assert states[ord] == null : "state for ord: " + ord + + " already registered"; + this.docFreq += docFreq; + states[ord] = state; + } + + /** + * Returns the {@link TermState} for an leaf ordinal or null if no + * {@link TermState} for the ordinal was registered. + * + * @param ord + * the readers leaf ordinal to get the {@link TermState} for. + * @return the {@link TermState} for the given readers ord or null if no + * {@link TermState} for the reader was registered + */ + public TermState get(int ord) { + assert ord >= 0 && ord < states.length; + return states[ord]; + } + + /** + * Returns the accumulated document frequency of all {@link TermState} + * instances passed to {@link #register(TermState, int)}. + * @return the accumulated document frequency of all {@link TermState} + * instances passed to {@link #register(TermState, int)}. + */ + public int docFreq() { + return docFreq; + } +} \ No newline at end of file Property changes on: lucene/src/java/org/apache/lucene/util/PerReaderTermState.java ___________________________________________________________________ Added: svn:eol-style + native Added: svn:keywords + Date Author Id Revision HeadURL Index: lucene/src/test/org/apache/lucene/TestExternalCodecs.java =================================================================== --- lucene/src/test/org/apache/lucene/TestExternalCodecs.java (revision 1058306) +++ lucene/src/test/org/apache/lucene/TestExternalCodecs.java (working copy) @@ -18,6 +18,7 @@ */ import org.apache.lucene.util.*; +import org.apache.lucene.util.Bits; import org.apache.lucene.index.*; import org.apache.lucene.document.*; import org.apache.lucene.search.*; @@ -330,10 +331,6 @@ } @Override - public void cacheCurrentTerm() { - } - - @Override public DocsEnum docs(Bits skipDocs, DocsEnum reuse) { return new RAMDocsEnum(ramField.termToDocs.get(current), skipDocs); } Index: lucene/src/test/org/apache/lucene/search/QueryUtils.java =================================================================== --- lucene/src/test/org/apache/lucene/search/QueryUtils.java (revision 1058306) +++ lucene/src/test/org/apache/lucene/search/QueryUtils.java (working copy) @@ -212,8 +212,6 @@ throw e2; } } - - /** alternate scorer skipTo(),skipTo(),next(),next(),skipTo(),skipTo(), etc * and ensure a hitcollector receives same docs and scores Index: lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java =================================================================== --- lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java (revision 1058306) +++ lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java (working copy) @@ -40,6 +40,7 @@ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.OrdTermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.codecs.CodecProvider; Index: solr/src/java/org/apache/solr/request/UnInvertedField.java =================================================================== --- solr/src/java/org/apache/solr/request/UnInvertedField.java (revision 1058306) +++ solr/src/java/org/apache/solr/request/UnInvertedField.java (working copy) @@ -1000,10 +1000,6 @@ return tenum.docFreq(); } - @Override - public void cacheCurrentTerm() { - throw new UnsupportedOperationException(); - } public BytesRef skipTo(BytesRef target) throws IOException {