Index: lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java =================================================================== --- lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java (revision 1050469) +++ lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java (working copy) @@ -19,9 +19,13 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermState; +import org.apache.lucene.index.TermState.MockTermState; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; + +import java.io.IOException; import java.util.Arrays; import java.util.Comparator; @@ -91,10 +95,6 @@ } @Override - public void cacheCurrentTerm() { - } - - @Override public BytesRef term() { return br; } @@ -129,5 +129,16 @@ public Comparator getComparator() { return BytesRef.getUTF8SortedAsUnicodeComparator(); } + + @Override + public TermState termState() throws IOException { + return new MockTermState(ord(), docFreq()); + } + + @Override + public SeekStatus seek(BytesRef term, TermState state) throws IOException { + assert state != null; + return seek(state.ord()); // just use the ord for simplicity + } } Index: lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java =================================================================== --- lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (revision 1050469) +++ lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (working copy) @@ -38,6 +38,7 @@ import org.apache.lucene.document.FieldSelector; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Fields; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.FieldsEnum; @@ -48,6 +49,7 @@ import org.apache.lucene.index.TermPositionVector; import org.apache.lucene.index.TermVectorMapper; import org.apache.lucene.index.FieldInvertState; +import org.apache.lucene.index.TermsEnum.SeekStatus; import org.apache.lucene.search.Collector; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; @@ -877,10 +879,6 @@ } @Override - public void cacheCurrentTerm() { - } - - @Override public long ord() { return termUpto; } @@ -910,6 +908,24 @@ public Comparator getComparator() { return BytesRef.getUTF8SortedAsUnicodeComparator(); } + + @Override + public SeekStatus seek(BytesRef term, TermState state) throws IOException { + assert state != null; + return this.seek(state.ord()); + } + + @Override + public TermState termState() throws IOException { + return new TermState() { + @Override + public void copy(TermState other) { /**/ } + @Override + public int docFreq() { return 1; } + @Override + public long ord() { return termUpto; } + }; + } } private class MemoryDocsEnum extends DocsEnum { Index: lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java =================================================================== --- lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java (revision 1050469) +++ lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java (working copy) @@ -112,7 +112,6 @@ docsReader, state.readBufferSize, BytesRef.getUTF8SortedAsUnicodeComparator(), - StandardCodec.TERMS_CACHE_SIZE, state.codecId); success = true; return ret; Index: lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictReader.java =================================================================== --- lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictReader.java (revision 1050469) +++ lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictReader.java (working copy) @@ -25,6 +25,7 @@ import org.apache.lucene.index.codecs.PrefixCodedTermsReader; import org.apache.lucene.index.codecs.PrefixCodedTermsWriter; import org.apache.lucene.index.codecs.TermsIndexReaderBase; +import org.apache.lucene.index.codecs.standard.StandardCodec; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.BytesRef; @@ -35,9 +36,9 @@ public AppendingTermsDictReader(TermsIndexReaderBase indexReader, Directory dir, FieldInfos fieldInfos, String segment, PostingsReaderBase postingsReader, int readBufferSize, - Comparator termComp, int termsCacheSize, String codecId) throws IOException { + Comparator termComp, String codecId) throws IOException { super(indexReader, dir, fieldInfos, segment, postingsReader, readBufferSize, - termComp, termsCacheSize, codecId); + termComp, StandardCodec.TERMS_CACHE_SIZE, codecId); } @Override Index: lucene/src/java/org/apache/lucene/index/FilterIndexReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (revision 1050469) +++ lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (working copy) @@ -130,11 +130,6 @@ } @Override - public void cacheCurrentTerm() throws IOException { - in.cacheCurrentTerm(); - } - - @Override public SeekStatus seek(long ord) throws IOException { return in.seek(ord); } @@ -173,6 +168,16 @@ public Comparator getComparator() throws IOException { return in.getComparator(); } + + @Override + public SeekStatus seek(BytesRef term, TermState state) throws IOException { + return in.seek(term, state); + } + + @Override + public TermState termState() throws IOException { + return in.termState(); + } } /** Base class for filtering {@link DocsEnum} implementations. */ Index: lucene/src/java/org/apache/lucene/index/IndexReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/IndexReader.java (revision 1050469) +++ lucene/src/java/org/apache/lucene/index/IndexReader.java (working copy) @@ -1070,6 +1070,49 @@ return null; } } + + /** + * Returns {@link DocsEnum} for the specified field, term and + * {@link TermState}. This may return null, if either the field or the term + * does not exists or the {@link TermState} is invalid for the underlying + * implementation.*/ + public DocsEnum termDocsEnum(Bits skipDocs, String field, BytesRef term, TermState state) throws IOException { + assert state != null; + assert field != null; + assert term != null; + final Fields fields = fields(); + if (fields == null) { + return null; + } + final Terms terms = fields.terms(field); + if (terms != null) { + return terms.docs(skipDocs, term, state, null); + } else { + return null; + } + } + + /** + * Returns {@link DocsAndPositionsEnum} for the specified field, term and + * {@link TermState}. This may return null, if either the field or the term + * does not exists, the {@link TermState} is invalid for the underlying + * implementation, or positions were not stored for this term.*/ + public DocsAndPositionsEnum termPositionsEnum(Bits skipDocs, String field, BytesRef term, TermState state) throws IOException { + assert state != null; + assert field != null; + assert term != null; + final Fields fields = fields(); + if (fields == null) { + return null; + } + final Terms terms = fields.terms(field); + if (terms != null) { + return terms.docsAndPositions(skipDocs, term, state, null); + } else { + return null; + } + } + /** Deletes the document numbered docNum. Once a document is * deleted it will not appear in TermDocs or TermPositions enumerations. @@ -1126,7 +1169,7 @@ if (docs == null) return 0; int n = 0; int doc; - while ((doc = docs.nextDoc()) != docs.NO_MORE_DOCS) { + while ((doc = docs.nextDoc()) != DocsEnum.NO_MORE_DOCS) { deleteDocument(doc); n++; } Index: lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java (revision 1050469) +++ lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java (working copy) @@ -17,6 +17,7 @@ * limitations under the License. */ +import org.apache.lucene.index.TermState.MockTermState; import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.Bits; @@ -91,13 +92,6 @@ } @Override - public void cacheCurrentTerm() throws IOException { - for(int i=0;i getComparator() { return termComp; } @@ -434,4 +428,15 @@ } } } + + @Override + public SeekStatus seek(BytesRef term, TermState state /*ignored*/) throws IOException { + assert term != null && state instanceof MockTermState; + return seek(term); + } + + @Override + public TermState termState() throws IOException { + return new MockTermState(-1, docFreq()); + } } Index: lucene/src/java/org/apache/lucene/index/TermState.java =================================================================== --- lucene/src/java/org/apache/lucene/index/TermState.java (revision 1050364) +++ lucene/src/java/org/apache/lucene/index/TermState.java (working copy) @@ -1,4 +1,4 @@ -package org.apache.lucene.index.codecs; +package org.apache.lucene.index; /** * Licensed to the Apache Software Foundation (ASF) under one or more @@ -17,27 +17,35 @@ * limitations under the License. */ -import org.apache.lucene.index.DocsEnum; // for javadocs - -import org.apache.lucene.index.codecs.standard.StandardPostingsReader; // javadocs - /** - * Holds all state required for {@link StandardPostingsReader} - * to produce a {@link DocsEnum} without re-seeking the - * terms dict. + * Holds all state required for {@link TermsEnum} to produce a {@link DocsEnum} + * without re-seeking the terms dict. + * * @lucene.experimental */ +public abstract class TermState implements Cloneable { -public class TermState implements Cloneable { - public long ord; // ord for this term - public long filePointer; // fp into the terms dict primary file (_X.tis) - public int docFreq; // how many docs have this term + /** + * Copies the content of the given {@link TermState} to this instance + * + * @param other + * the TermState to copy + */ + public abstract void copy(TermState other); - public void copy(TermState other) { - ord = other.ord; - filePointer = other.filePointer; - docFreq = other.docFreq; - } + /** + * Returns ordinal of the term this {@link TermState} was created for or + * -1 it the ord is undefined. + */ + public abstract long ord(); + + /** + * Returns the document frequency of the term this {@link TermState} was + * created for in the creating {@link TermsEnum} + * @return the document frequency of the term this {@link TermState} was + * created for in the creating {@link TermsEnum} + */ + public abstract int docFreq(); @Override public Object clone() { @@ -49,8 +57,36 @@ } } - @Override - public String toString() { - return "tis.fp=" + filePointer + " docFreq=" + docFreq + " ord=" + ord; + /** + * A {@link TermState} that only holds the ord and the docFreq for a term instead of + * additional information. + * @lucene.experimental + */ + public static final class MockTermState extends TermState { + + private int freq; + private long ord; + + public MockTermState(long ord, int docFreq) { + this.freq = docFreq; + this.ord = ord; + } + + @Override + public void copy(TermState other) { + final MockTermState state = (MockTermState) other; + state.freq = freq; + state.ord = ord; + } + + @Override + public int docFreq() { + return freq; + } + + @Override + public long ord() { + return ord; + } } -} +} \ No newline at end of file Index: lucene/src/java/org/apache/lucene/index/Terms.java =================================================================== --- lucene/src/java/org/apache/lucene/index/Terms.java (revision 1050469) +++ lucene/src/java/org/apache/lucene/index/Terms.java (working copy) @@ -80,6 +80,39 @@ } } + /** + * Expert: Get {@link DocsEnum} for the specified term and {@link TermState}. + * This method may return null if the term does not exist. + * + * @see TermsEnum#termState() + * @see TermsEnum#seek(BytesRef, TermState) */ + public DocsEnum docs(Bits skipDocs, BytesRef text, TermState termState, + DocsEnum reuse) throws IOException { + final TermsEnum termsEnum = getThreadTermsEnum(); + if (termsEnum.seek(text, termState) == TermsEnum.SeekStatus.FOUND) { + return termsEnum.docs(skipDocs, reuse); + } else { + return null; + } + } + + /** + * Get {@link DocsEnum} for the specified term and {@link TermState}. This + * method will may return null if the term does not exists, or positions were + * not indexed. + * + * @see TermsEnum#termState() + * @see TermsEnum#seek(BytesRef, TermState) */ + public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, BytesRef text, + TermState termState, DocsAndPositionsEnum reuse) throws IOException { + final TermsEnum termsEnum = getThreadTermsEnum(); + if (termsEnum.seek(text, termState) == TermsEnum.SeekStatus.FOUND) { + return termsEnum.docsAndPositions(skipDocs, reuse); + } else { + return null; + } + } + public long getUniqueTermCount() throws IOException { throw new UnsupportedOperationException("this reader does not implement getUniqueTermCount()"); } Index: lucene/src/java/org/apache/lucene/index/TermsEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/index/TermsEnum.java (revision 1050469) +++ lucene/src/java/org/apache/lucene/index/TermsEnum.java (working copy) @@ -73,6 +73,20 @@ * may be before or after the current ord. See {@link * #seek(BytesRef)}. */ public abstract SeekStatus seek(long ord) throws IOException; + + /** + * Expert: Seeks to the specified term by {@link TermState} as previously + * returned by {@link #termState()}. The given {@link BytesRef} must hold the + * same text as used for positioning the TermsEnum before {@link #termState()} + * was called. Callers should maintain the {@link BytesRef} obtained from + * {@link #term()} together with the {@link TermState} to use this method. + * Low-level implementations will position the TermsEnum without re-seeking + * the term dictionary. Using this method with a {@link TermState} obtained + * from a different {@link Terms} instance will leave the {@link TermsEnum} in + * undefined state. + * */ + public abstract SeekStatus seek(BytesRef text, TermState state) throws IOException; + /** Increments the enumeration to the next element. * Returns the resulting term, or null if the end was @@ -98,7 +112,7 @@ * first time, after next() returns null or seek returns * {@link SeekStatus#END}.*/ public abstract int docFreq(); - + /** Get {@link DocsEnum} for the current term. Do not * call this before calling {@link #next} or {@link * #seek} for the first time. This method will not @@ -116,6 +130,15 @@ * the postings by this codec. */ public abstract DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException; + /** + * Expert: Returns the TermsEnums internal state to position the TermsEnum + * without re-seeking the term dictionary. + * + * @see TermState + * @see #seek(BytesRef, TermState) + */ + public abstract TermState termState() throws IOException; + /** Return the {@link BytesRef} Comparator used to sort * terms provided by the iterator. This may return * null if there are no terms. Callers may invoke this @@ -123,10 +146,6 @@ * instance & reuse it. */ public abstract Comparator getComparator() throws IOException; - /** Optional optimization hint: informs the codec that the - * current term is likely to be re-seek'd-to soon. */ - public abstract void cacheCurrentTerm() throws IOException; - /** An empty TermsEnum for quickly returning an empty instance e.g. * in {@link org.apache.lucene.search.MultiTermQuery} *

Please note: This enum should be unmodifiable, @@ -142,9 +161,6 @@ public SeekStatus seek(long ord) { return SeekStatus.END; } @Override - public void cacheCurrentTerm() {} - - @Override public BytesRef term() { throw new IllegalStateException("this method should never be called"); } @@ -183,5 +199,15 @@ public synchronized AttributeSource attributes() { return super.attributes(); } + + @Override + public TermState termState() throws IOException { + throw new IllegalStateException("this method should never be called"); + } + + @Override + public SeekStatus seek(BytesRef term, TermState state) throws IOException { + throw new IllegalStateException("this method should never be called"); + } }; } Index: lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java (revision 1050469) +++ lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java (working copy) @@ -42,17 +42,17 @@ public abstract void init(IndexInput termsIn) throws IOException; /** Return a newly created empty TermState */ - public abstract TermState newTermState() throws IOException; + public abstract TermStateBase newTermState() throws IOException; - public abstract void readTerm(IndexInput termsIn, FieldInfo fieldInfo, TermState state, boolean isIndexTerm) throws IOException; + public abstract void readTerm(IndexInput termsIn, FieldInfo fieldInfo, TermStateBase state, boolean isIndexTerm) throws IOException; /** Must fully consume state, since after this call that * TermState may be reused. */ - public abstract DocsEnum docs(FieldInfo fieldInfo, TermState state, Bits skipDocs, DocsEnum reuse) throws IOException; + public abstract DocsEnum docs(FieldInfo fieldInfo, TermStateBase state, Bits skipDocs, DocsEnum reuse) throws IOException; /** Must fully consume state, since after this call that * TermState may be reused. */ - public abstract DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, TermState state, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException; + public abstract DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, TermStateBase state, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException; public abstract void close() throws IOException; } Index: lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java (revision 1050469) +++ lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java (working copy) @@ -31,6 +31,7 @@ import org.apache.lucene.index.FieldsEnum; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.store.Directory; @@ -68,7 +69,7 @@ private final Comparator termComp; // Caches the most recently looked-up field + terms: - private final DoubleBarrelLRUCache termsCache; + private final DoubleBarrelLRUCache termsCache; // Reads the terms index private TermsIndexReaderBase indexReader; @@ -84,11 +85,6 @@ public FieldAndTerm() { } - public FieldAndTerm(String field, BytesRef term) { - this.field = field; - this.term = new BytesRef(term); - } - public FieldAndTerm(FieldAndTerm other) { field = other.field; term = new BytesRef(other.term); @@ -116,7 +112,7 @@ throws IOException { this.postingsReader = postingsReader; - termsCache = new DoubleBarrelLRUCache(termsCacheSize); + termsCache = new DoubleBarrelLRUCache(termsCacheSize); this.termComp = termComp; @@ -282,10 +278,10 @@ } // Iterates through terms in this field - private class SegmentTermsEnum extends TermsEnum { + private final class SegmentTermsEnum extends TermsEnum { private final IndexInput in; private final DeltaBytesReader bytesReader; - private final TermState state; + private final TermStateBase state; private boolean seekPending; private final TermsIndexReaderBase.TermsIndexResult indexResult = new TermsIndexReaderBase.TermsIndexResult(); private final FieldAndTerm fieldTerm = new FieldAndTerm(); @@ -304,33 +300,20 @@ return termComp; } - @Override - public void cacheCurrentTerm() { - TermState stateCopy = (TermState) state.clone(); - stateCopy.filePointer = in.getFilePointer(); - termsCache.put(new FieldAndTerm(fieldInfo.name, bytesReader.term), - stateCopy); - } - /** Seeks until the first term that's >= the provided * text; returns SeekStatus.FOUND if the exact term * is found, SeekStatus.NOT_FOUND if a different term * was found, SeekStatus.END if we hit EOF */ @Override - public SeekStatus seek(BytesRef term, boolean useCache) throws IOException { + public SeekStatus seek(final BytesRef term, final boolean useCache) throws IOException { // Check cache fieldTerm.term = term; - TermState cachedState; if (useCache) { - cachedState = termsCache.get(fieldTerm); + final TermStateBase cachedState = termsCache.get(fieldTerm); if (cachedState != null) { - state.copy(cachedState); - seekPending = true; - bytesReader.term.copy(term); + setTermState(term, cachedState); return SeekStatus.FOUND; } - } else { - cachedState = null; } boolean doSeek = true; @@ -387,16 +370,9 @@ while(next() != null) { final int cmp = termComp.compare(bytesReader.term, term); if (cmp == 0) { - if (doSeek && useCache) { - // Store in cache - FieldAndTerm entryKey = new FieldAndTerm(fieldTerm); - cachedState = (TermState) state.clone(); - // this is fp after current term - cachedState.filePointer = in.getFilePointer(); - termsCache.put(entryKey, cachedState); + cacheTerm(fieldTerm); } - return SeekStatus.FOUND; } else if (cmp > 0) { return SeekStatus.NOT_FOUND; @@ -412,6 +388,24 @@ return SeekStatus.END; } + private final void setTermState(final BytesRef term, final TermState termState) { + assert termState != null; + assert term != null; + state.copy(termState); + seekPending = true; + bytesReader.term.copy(term); + } + + private final void cacheTerm(FieldAndTerm other) { + // Store in cache + final FieldAndTerm entryKey = new FieldAndTerm(other); + final TermStateBase cachedState = (TermStateBase) state.clone(); + // this is fp after current term + cachedState.filePointer = in.getFilePointer(); + termsCache.put(entryKey, cachedState); + } + + @Override public SeekStatus seek(long ord) throws IOException { @@ -494,7 +488,7 @@ @Override public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException { - DocsEnum docsEnum = postingsReader.docs(fieldInfo, state, skipDocs, reuse); + final DocsEnum docsEnum = postingsReader.docs(fieldInfo, state, skipDocs, reuse); assert docsEnum != null; return docsEnum; } @@ -507,6 +501,22 @@ return postingsReader.docsAndPositions(fieldInfo, state, skipDocs, reuse); } } + + @Override + public SeekStatus seek(BytesRef term, TermState otherState) throws IOException { + assert otherState != null; + assert otherState.getClass() == this.state.getClass(); + assert otherState.ord() < numTerms; + setTermState(term, otherState); + return SeekStatus.FOUND; + } + + @Override + public TermState termState() throws IOException { + final TermStateBase newTermState = (TermStateBase) state.clone(); + newTermState.filePointer = in.getFilePointer(); + return newTermState; + } } } } Index: lucene/src/java/org/apache/lucene/index/codecs/TermState.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/TermState.java (revision 1050469) +++ lucene/src/java/org/apache/lucene/index/codecs/TermState.java (working copy) @@ -1,56 +0,0 @@ -package org.apache.lucene.index.codecs; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.index.DocsEnum; // for javadocs - -import org.apache.lucene.index.codecs.standard.StandardPostingsReader; // javadocs - -/** - * Holds all state required for {@link StandardPostingsReader} - * to produce a {@link DocsEnum} without re-seeking the - * terms dict. - * @lucene.experimental - */ - -public class TermState implements Cloneable { - public long ord; // ord for this term - public long filePointer; // fp into the terms dict primary file (_X.tis) - public int docFreq; // how many docs have this term - - public void copy(TermState other) { - ord = other.ord; - filePointer = other.filePointer; - docFreq = other.docFreq; - } - - @Override - public Object clone() { - try { - return super.clone(); - } catch (CloneNotSupportedException cnse) { - // should not happen - throw new RuntimeException(cnse); - } - } - - @Override - public String toString() { - return "tis.fp=" + filePointer + " docFreq=" + docFreq + " ord=" + ord; - } -} Index: lucene/src/java/org/apache/lucene/index/codecs/TermStateBase.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/TermStateBase.java (revision 0) +++ lucene/src/java/org/apache/lucene/index/codecs/TermStateBase.java (revision 0) @@ -0,0 +1,60 @@ +package org.apache.lucene.index.codecs; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.DocsEnum; // for javadocs +import org.apache.lucene.index.TermState; + +import org.apache.lucene.index.codecs.standard.StandardPostingsReader; // javadocs + +/** + * Holds all state required for {@link StandardPostingsReader} + * to produce a {@link DocsEnum} without re-seeking the + * terms dict. + * @lucene.experimental + */ +public class TermStateBase extends TermState { + public long ord; // ord for this term + public long filePointer; // fp into the terms dict primary file (_X.tis) + public int docFreq; // how many docs have this term + + @Override + public void copy(TermState other) { + final TermStateBase state = (TermStateBase) other; + ord = state.ord; + filePointer = state.filePointer; + docFreq = state.docFreq; + } + + + @Override + public String toString() { + return "tis.fp=" + filePointer + " docFreq=" + docFreq + " ord=" + ord; + } + + @Override + public int docFreq() { + return docFreq; + } + + @Override + public long ord() { + return ord; + } + +} Property changes on: lucene/src/java/org/apache/lucene/index/codecs/TermStateBase.java ___________________________________________________________________ Added: svn:eol-style + native Added: svn:keywords + Date Author Id Revision HeadURL Index: lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java (revision 1050469) +++ lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java (working copy) @@ -33,9 +33,11 @@ import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.CompoundFileReader; +import org.apache.lucene.index.TermState.MockTermState; import org.apache.lucene.index.codecs.FieldsProducer; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; @@ -742,11 +744,6 @@ } @Override - public void cacheCurrentTerm() throws IOException { - getTermsDict().cacheCurrentTerm(termEnum); - } - - @Override public SeekStatus seek(long ord) throws IOException { throw new UnsupportedOperationException(); } @@ -971,6 +968,17 @@ } return docsPosEnum.reset(termEnum, skipDocs); } + + @Override + public SeekStatus seek(BytesRef term, TermState state) throws IOException { + assert term != null; + return seek(term); + } + + @Override + public TermState termState() throws IOException { + return new MockTermState(-1, termEnum.docFreq()); + } } private final class PreDocsEnum extends DocsEnum { Index: lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java (revision 1050469) +++ lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java (working copy) @@ -22,7 +22,8 @@ import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.DocsAndPositionsEnum; -import org.apache.lucene.index.codecs.TermState; +import org.apache.lucene.index.TermState; +import org.apache.lucene.index.codecs.TermStateBase; import org.apache.lucene.index.codecs.PostingsReaderBase; import org.apache.lucene.index.codecs.pulsing.PulsingPostingsWriterImpl.Document; import org.apache.lucene.index.codecs.pulsing.PulsingPostingsWriterImpl.Position; @@ -57,14 +58,14 @@ wrappedPostingsReader.init(termsIn); } - private static class PulsingTermState extends TermState { + private static final class PulsingTermState extends TermStateBase { private Document docs[]; - private TermState wrappedTermState; + private TermStateBase wrappedTermState; private boolean pendingIndexTerm; + @Override public Object clone() { - PulsingTermState clone; - clone = (PulsingTermState) super.clone(); + final PulsingTermState clone = (PulsingTermState) super.clone(); clone.docs = docs.clone(); for(int i=0;i> result = fstEnum.next(); @@ -215,6 +213,16 @@ } return docsAndPositionsEnum.reset(docsStart, skipDocs); } + + public SeekStatus seek(BytesRef term, TermState state) throws IOException { + assert term != null; + return seek(term); + } + + @Override + public TermState termState() throws IOException { + return new MockTermState(-1, docFreq()); + } @Override public Comparator getComparator() { @@ -440,7 +448,6 @@ } private class SimpleTextTerms extends Terms { - private final String field; private final long termsStart; private final boolean omitTF; private FST> fst; @@ -448,7 +455,6 @@ private final BytesRef scratch = new BytesRef(10); public SimpleTextTerms(String field, long termsStart) throws IOException { - this.field = StringHelper.intern(field); this.termsStart = termsStart; omitTF = fieldInfos.fieldInfo(field).omitTermFreqAndPositions; loadTerms(); Index: lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java (revision 1050469) +++ lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java (working copy) @@ -26,8 +26,9 @@ import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.codecs.PostingsReaderBase; -import org.apache.lucene.index.codecs.TermState; +import org.apache.lucene.index.codecs.TermStateBase; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -83,20 +84,22 @@ } // Must keep final because we do non-standard clone - private final static class DocTermState extends TermState { + private final static class StandardTermState extends TermStateBase { long freqOffset; long proxOffset; int skipOffset; - + + @Override public Object clone() { - DocTermState other = new DocTermState(); + StandardTermState other = new StandardTermState(); other.copy(this); return other; } + @Override public void copy(TermState _other) { super.copy(_other); - DocTermState other = (DocTermState) _other; + StandardTermState other = (StandardTermState) _other; freqOffset = other.freqOffset; proxOffset = other.proxOffset; skipOffset = other.skipOffset; @@ -108,8 +111,8 @@ } @Override - public TermState newTermState() { - return new DocTermState(); + public TermStateBase newTermState() { + return new StandardTermState(); } @Override @@ -126,10 +129,9 @@ } @Override - public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, TermState termState, boolean isIndexTerm) + public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, TermStateBase termState, boolean isIndexTerm) throws IOException { - - final DocTermState docTermState = (DocTermState) termState; + final StandardTermState docTermState = (StandardTermState) termState; if (isIndexTerm) { docTermState.freqOffset = termsIn.readVLong(); @@ -153,7 +155,7 @@ } @Override - public DocsEnum docs(FieldInfo fieldInfo, TermState termState, Bits skipDocs, DocsEnum reuse) throws IOException { + public DocsEnum docs(FieldInfo fieldInfo, TermStateBase termState, Bits skipDocs, DocsEnum reuse) throws IOException { SegmentDocsEnum docsEnum; if (reuse == null || !(reuse instanceof SegmentDocsEnum)) { docsEnum = new SegmentDocsEnum(freqIn); @@ -166,11 +168,11 @@ docsEnum = new SegmentDocsEnum(freqIn); } } - return docsEnum.reset(fieldInfo, (DocTermState) termState, skipDocs); + return docsEnum.reset(fieldInfo, (StandardTermState) termState, skipDocs); } @Override - public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, TermState termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException { + public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, TermStateBase termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException { if (fieldInfo.omitTermFreqAndPositions) { return null; } @@ -189,7 +191,7 @@ docsEnum = new SegmentDocsAndPositionsAndPayloadsEnum(freqIn, proxIn); } } - return docsEnum.reset(fieldInfo, (DocTermState) termState, skipDocs); + return docsEnum.reset(fieldInfo, (StandardTermState) termState, skipDocs); } else { SegmentDocsAndPositionsEnum docsEnum; if (reuse == null || !(reuse instanceof SegmentDocsAndPositionsEnum)) { @@ -203,7 +205,7 @@ docsEnum = new SegmentDocsAndPositionsEnum(freqIn, proxIn); } } - return docsEnum.reset(fieldInfo, (DocTermState) termState, skipDocs); + return docsEnum.reset(fieldInfo, (StandardTermState) termState, skipDocs); } } @@ -233,7 +235,7 @@ this.freqIn = (IndexInput) freqIn.clone(); } - public SegmentDocsEnum reset(FieldInfo fieldInfo, DocTermState termState, Bits skipDocs) throws IOException { + public SegmentDocsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits skipDocs) throws IOException { omitTF = fieldInfo.omitTermFreqAndPositions; if (omitTF) { freq = 1; @@ -407,7 +409,7 @@ this.proxIn = (IndexInput) proxIn.clone(); } - public SegmentDocsAndPositionsEnum reset(FieldInfo fieldInfo, DocTermState termState, Bits skipDocs) throws IOException { + public SegmentDocsAndPositionsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits skipDocs) throws IOException { assert !fieldInfo.omitTermFreqAndPositions; assert !fieldInfo.storePayloads; @@ -594,7 +596,7 @@ this.proxIn = (IndexInput) proxIn.clone(); } - public SegmentDocsAndPositionsAndPayloadsEnum reset(FieldInfo fieldInfo, DocTermState termState, Bits skipDocs) throws IOException { + public SegmentDocsAndPositionsAndPayloadsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits skipDocs) throws IOException { assert !fieldInfo.omitTermFreqAndPositions; assert fieldInfo.storePayloads; if (payload == null) { Index: lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java =================================================================== --- lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java (revision 1050469) +++ lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java (working copy) @@ -21,9 +21,15 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefHash; +import org.apache.lucene.util.PerReaderTermState; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray; class ConstantScoreAutoRewrite extends TermCollectingRewrite { @@ -71,8 +77,8 @@ } @Override - protected void addClause(BooleanQuery topLevel, Term term, int docFreq, float boost /*ignored*/) { - topLevel.add(new TermQuery(term, docFreq), BooleanClause.Occur.SHOULD); + protected void addClause(BooleanQuery topLevel, Term term, int docFreq, float boost /*ignored*/, PerReaderTermState states) { + topLevel.add(new TermQuery(term, docFreq, states), BooleanClause.Occur.SHOULD); } @Override @@ -98,9 +104,10 @@ final BytesRefHash pendingTerms = col.pendingTerms; final int sort[] = pendingTerms.sort(col.termsEnum.getComparator()); for(int i = 0; i < size; i++) { + final int pos = sort[i]; // docFreq is not used for constant score here, we pass 1 // to explicitely set a fake value, so it's not calculated - addClause(bq, placeholderTerm.createTerm(pendingTerms.get(sort[i], new BytesRef())), 1, 1.0f); + addClause(bq, placeholderTerm.createTerm(pendingTerms.get(pos, new BytesRef())), 1, 1.0f, col.array.termState[pos]); } // Strip scores final Query result = new ConstantScoreQuery(new QueryWrapperFilter(bq)); @@ -123,12 +130,21 @@ @Override public boolean collect(BytesRef bytes) throws IOException { - pendingTerms.add(bytes); + int pos = pendingTerms.add(bytes); docVisitCount += termsEnum.docFreq(); if (pendingTerms.size() >= termCountLimit || docVisitCount >= docCountCutoff) { hasCutOff = true; return false; } + + final TermState termState = termsEnum.termState(); + assert termState != null; + if (pos < 0) { + pos = (-pos)-1; + array.termState[pos].register(termState, reader); + } else { + array.termState[pos] = new PerReaderTermState(termState, reader); + } return true; } @@ -137,7 +153,8 @@ TermsEnum termsEnum; final int docCountCutoff, termCountLimit; - final BytesRefHash pendingTerms = new BytesRefHash(); + final TermStateByteStart array = new TermStateByteStart(16); + final BytesRefHash pendingTerms = new BytesRefHash(new ByteBlockPool(new ByteBlockPool.DirectAllocator()), 16, array); } @Override @@ -166,4 +183,40 @@ return true; } + + /** Special implementation of BytesStartArray that keeps parallel arrays for boost and docFreq */ + static final class TermStateByteStart extends DirectBytesStartArray { + PerReaderTermState[] termState; + + public TermStateByteStart(int initSize) { + super(initSize); + } + + @Override + public int[] init() { + final int[] ord = super.init(); + termState = new PerReaderTermState[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; + assert termState.length >= ord.length; + return ord; + } + + @Override + public int[] grow() { + final int[] ord = super.grow(); + if (termState.length < ord.length) { + PerReaderTermState[] tmpTermState = new PerReaderTermState[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; + System.arraycopy(termState, 0, tmpTermState, 0, termState.length); + termState = tmpTermState; + } + assert termState.length >= ord.length; + return ord; + } + + @Override + public int[] clear() { + termState = null; + return super.clear(); + } + + } } Index: lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java (revision 1050469) +++ lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java (working copy) @@ -21,6 +21,7 @@ import java.util.Comparator; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; @@ -155,12 +156,18 @@ public DocsAndPositionsEnum docsAndPositions(Bits bits, DocsAndPositionsEnum reuse) throws IOException { return tenum.docsAndPositions(bits, reuse); } - @Override - public void cacheCurrentTerm() throws IOException { - tenum.cacheCurrentTerm(); + public SeekStatus seek(BytesRef term, TermState state) throws IOException { + assert tenum != null; + return tenum.seek(term, state); } - + + @Override + public TermState termState() throws IOException { + assert tenum != null; + return tenum.termState(); + } + @SuppressWarnings("fallthrough") @Override public BytesRef next() throws IOException { Index: lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java (revision 1050469) +++ lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java (working copy) @@ -20,6 +20,7 @@ import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeImpl; @@ -245,11 +246,6 @@ } @Override - public void cacheCurrentTerm() throws IOException { - actualEnum.cacheCurrentTerm(); - } - - @Override public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException { return actualEnum.docs(skipDocs, reuse); } @@ -260,6 +256,15 @@ return actualEnum.docsAndPositions(skipDocs, reuse); } + public SeekStatus seek(BytesRef term, TermState state) throws IOException { + return actualEnum.seek(term, state); + } + + @Override + public TermState termState() throws IOException { + return actualEnum.termState(); + } + @Override public Comparator getComparator() throws IOException { return actualEnum.getComparator(); Index: lucene/src/java/org/apache/lucene/search/MultiTermQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/MultiTermQuery.java (revision 1050469) +++ lucene/src/java/org/apache/lucene/search/MultiTermQuery.java (working copy) @@ -26,6 +26,7 @@ import org.apache.lucene.index.TermsEnum; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.PerReaderTermState; /** * An abstract {@link Query} that matches documents @@ -159,8 +160,8 @@ } @Override - protected void addClause(BooleanQuery topLevel, Term term, int docCount, float boost) { - final TermQuery tq = new TermQuery(term, docCount); + protected void addClause(BooleanQuery topLevel, Term term, int docCount, float boost, PerReaderTermState states) { + final TermQuery tq = new TermQuery(term, docCount, states); tq.setBoost(boost); topLevel.add(tq, BooleanClause.Occur.SHOULD); } @@ -200,8 +201,8 @@ } @Override - protected void addClause(BooleanQuery topLevel, Term term, int docFreq, float boost) { - final Query q = new ConstantScoreQuery(new QueryWrapperFilter(new TermQuery(term, docFreq))); + protected void addClause(BooleanQuery topLevel, Term term, int docFreq, float boost, PerReaderTermState states) { + final Query q = new ConstantScoreQuery(new QueryWrapperFilter(new TermQuery(term, docFreq, states))); q.setBoost(boost); topLevel.add(q, BooleanClause.Occur.SHOULD); } Index: lucene/src/java/org/apache/lucene/search/ScoringRewrite.java =================================================================== --- lucene/src/java/org/apache/lucene/search/ScoringRewrite.java (revision 1050469) +++ lucene/src/java/org/apache/lucene/search/ScoringRewrite.java (working copy) @@ -18,24 +18,18 @@ */ import java.io.IOException; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.PriorityQueue; -import java.util.Comparator; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.MultiTermQuery.RewriteMethod; import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefHash; +import org.apache.lucene.util.PerReaderTermState; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray; @@ -62,11 +56,12 @@ } @Override - protected void addClause(BooleanQuery topLevel, Term term, int docCount, float boost) { - final TermQuery tq = new TermQuery(term, docCount); + protected void addClause(BooleanQuery topLevel, Term term, int docCount, + float boost, PerReaderTermState states) { + final TermQuery tq = new TermQuery(term, docCount, states); tq.setBoost(boost); - topLevel.add(tq, BooleanClause.Occur.SHOULD); - } + topLevel.add(tq, BooleanClause.Occur.SHOULD); + } @Override protected void checkMaxClauseCount(int count) { @@ -77,7 +72,7 @@ // Make sure we are still a singleton even after deserializing protected Object readResolve() { return SCORING_BOOLEAN_QUERY_REWRITE; - } + } }; /** Like {@link #SCORING_BOOLEAN_QUERY_REWRITE} except @@ -123,13 +118,13 @@ final int size = col.terms.size(); if (size > 0) { final int sort[] = col.terms.sort(col.termsEnum.getComparator()); - final int[] docFreq = col.array.docFreq; final float[] boost = col.array.boost; + final PerReaderTermState[] termStates = col.array.termState; for (int i = 0; i < size; i++) { final int pos = sort[i]; final Term term = placeholderTerm.createTerm(col.terms.get(pos, new BytesRef())); - assert reader.docFreq(term) == docFreq[pos]; - addClause(result, term, docFreq[pos], query.getBoost() * boost[pos]); + assert reader.docFreq(term) == termStates[pos].docFreq(); + addClause(result, term, termStates[pos].docFreq(), query.getBoost() * boost[pos], termStates[pos]); } } query.incTotalNumberOfTerms(size); @@ -152,15 +147,17 @@ @Override public boolean collect(BytesRef bytes) throws IOException { final int e = terms.add(bytes); + final TermState state = termsEnum.termState(); + assert state != null; if (e < 0 ) { // duplicate term: update docFreq final int pos = (-e)-1; - array.docFreq[pos] += termsEnum.docFreq(); + array.termState[pos].register(state, reader); assert array.boost[pos] == boostAtt.getBoost() : "boost should be equal in all segment TermsEnums"; } else { // new entry: we populate the entry initially - array.docFreq[e] = termsEnum.docFreq(); array.boost[e] = boostAtt.getBoost(); + array.termState[e] = new PerReaderTermState(state, reader); ScoringRewrite.this.checkMaxClauseCount(terms.size()); } return true; @@ -169,8 +166,8 @@ /** Special implementation of BytesStartArray that keeps parallel arrays for boost and docFreq */ static final class TermFreqBoostByteStart extends DirectBytesStartArray { - int[] docFreq; float[] boost; + PerReaderTermState[] termState; public TermFreqBoostByteStart(int initSize) { super(initSize); @@ -180,24 +177,28 @@ public int[] init() { final int[] ord = super.init(); boost = new float[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_FLOAT)]; - docFreq = new int[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_INT)]; - assert boost.length >= ord.length && docFreq.length >= ord.length; + termState = new PerReaderTermState[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; + assert termState.length >= ord.length && boost.length >= ord.length; return ord; } @Override public int[] grow() { final int[] ord = super.grow(); - docFreq = ArrayUtil.grow(docFreq, ord.length); boost = ArrayUtil.grow(boost, ord.length); - assert boost.length >= ord.length && docFreq.length >= ord.length; + if (termState.length < ord.length) { + PerReaderTermState[] tmpTermState = new PerReaderTermState[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; + System.arraycopy(termState, 0, tmpTermState, 0, termState.length); + termState = tmpTermState; + } + assert termState.length >= ord.length && boost.length >= ord.length; return ord; } @Override public int[] clear() { boost = null; - docFreq = null; + termState = null; return super.clear(); } Index: lucene/src/java/org/apache/lucene/search/TermCollectingRewrite.java =================================================================== --- lucene/src/java/org/apache/lucene/search/TermCollectingRewrite.java (revision 1050469) +++ lucene/src/java/org/apache/lucene/search/TermCollectingRewrite.java (working copy) @@ -18,7 +18,6 @@ */ import java.io.IOException; -import java.io.Serializable; import java.util.ArrayList; import java.util.List; import java.util.Comparator; @@ -30,6 +29,7 @@ import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.PerReaderTermState; import org.apache.lucene.util.ReaderUtil; abstract class TermCollectingRewrite extends MultiTermQuery.RewriteMethod { @@ -38,7 +38,12 @@ protected abstract Q getTopLevelQuery() throws IOException; /** Add a MultiTermQuery term to the top-level query */ - protected abstract void addClause(Q topLevel, Term term, int docCount, float boost) throws IOException; + protected final void addClause(Q topLevel, Term term, int docCount, float boost) throws IOException { + addClause(topLevel, term, docCount, boost, null); + } + + protected abstract void addClause(Q topLevel, Term term, int docCount, float boost, PerReaderTermState states) throws IOException; + protected final void collectTerms(IndexReader reader, MultiTermQuery query, TermCollector collector) throws IOException { final List subReaders = new ArrayList(); @@ -71,9 +76,9 @@ lastTermComp = newTermComp; collector.setNextEnum(termsEnum); + collector.setNextReader(r); BytesRef bytes; while ((bytes = termsEnum.next()) != null) { - termsEnum.cacheCurrentTerm(); if (!collector.collect(bytes)) return; // interrupt whole term collection, so also don't iterate other subReaders } @@ -81,6 +86,8 @@ } protected static abstract class TermCollector { + protected IndexReader reader; + /** attributes used for communication with the enum */ public final AttributeSource attributes = new AttributeSource(); @@ -89,5 +96,9 @@ /** the next segment's {@link TermsEnum} that is used to collect terms */ public abstract void setNextEnum(TermsEnum termsEnum) throws IOException; + + public void setNextReader(IndexReader reader) { + this.reader = reader; + } } } Index: lucene/src/java/org/apache/lucene/search/TermQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/TermQuery.java (revision 1050469) +++ lucene/src/java/org/apache/lucene/search/TermQuery.java (working copy) @@ -21,9 +21,13 @@ import java.util.Set; import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.Fields; import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.TermState; +import org.apache.lucene.index.Terms; import org.apache.lucene.search.Explanation.IDFExplanation; +import org.apache.lucene.util.PerReaderTermState; import org.apache.lucene.util.ToStringUtils; /** A Query that matches documents containing a term. @@ -31,7 +35,8 @@ */ public class TermQuery extends Query { private final Term term; - private final int docFreq; + private int docFreq; + private transient PerReaderTermState perReaderTermState; private class TermWeight extends Weight { private final Similarity similarity; @@ -40,10 +45,12 @@ private float queryNorm; private float queryWeight; private IDFExplanation idfExp; + private final transient PerReaderTermState termStates; - public TermWeight(Searcher searcher) + public TermWeight(Searcher searcher, final PerReaderTermState termStates) throws IOException { this.similarity = getSimilarity(searcher); + this.termStates = termStates; if (docFreq != -1) { idfExp = similarity.idfExplain(term, searcher, docFreq); } else { @@ -76,17 +83,24 @@ @Override public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException { - DocsEnum docs = reader.termDocsEnum(reader.getDeletedDocs(), - term.field(), - term.bytes()); - + final DocsEnum docs; + final TermState state; + if(termStates != null) { + if ((state = termStates.take(reader)) != null) { + docs = reader.termDocsEnum(reader.getDeletedDocs(), term.field(), term.bytes(), state); + } else { + return null; + } + } else { + docs = reader.termDocsEnum(reader.getDeletedDocs(), term.field(), term.bytes()); + } if (docs == null) { return null; } - return new TermScorer(this, docs, similarity, reader.norms(term.field())); } - + + @Override public Explanation explain(IndexReader reader, int doc) throws IOException { @@ -172,8 +186,16 @@ * provided docFreq instead of looking up the docFreq * against the searcher. */ public TermQuery(Term t, int docFreq) { + this(t, docFreq, null); + } + + /** Expert: constructs a TermQuery that will use the + * provided docFreq instead of looking up the docFreq + * against the searcher. */ + public TermQuery(Term t, int docFreq, PerReaderTermState states) { term = t; this.docFreq = docFreq; + this.perReaderTermState = states; } /** Returns the term of this query. */ @@ -181,7 +203,9 @@ @Override public Weight createWeight(Searcher searcher) throws IOException { - return new TermWeight(searcher); + final PerReaderTermState termState = this.perReaderTermState; + this.perReaderTermState = null; // don't hold on to this + return new TermWeight(searcher, termState); } @Override Index: lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java =================================================================== --- lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java (revision 1050469) +++ lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java (working copy) @@ -25,9 +25,11 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.PerReaderTermState; /** * Base rewrite method for collecting only the top terms @@ -83,7 +85,7 @@ } @Override - public boolean collect(BytesRef bytes) { + public boolean collect(BytesRef bytes) throws IOException { final float boost = boostAtt.getBoost(); // ignore uncompetetive hits if (stQueue.size() == maxSize) { @@ -94,21 +96,25 @@ return true; } ScoreTerm t = visitedTerms.get(bytes); + final TermState state = termsEnum.termState(); + assert state != null; if (t != null) { // if the term is already in the PQ, only update docFreq of term in PQ - t.docFreq += termsEnum.docFreq(); assert t.boost == boost : "boost should be equal in all segment TermsEnums"; + t.termState.register(state, reader); } else { // add new entry in PQ, we must clone the term, else it may get overwritten! st.bytes.copy(bytes); st.boost = boost; - st.docFreq = termsEnum.docFreq(); visitedTerms.put(st.bytes, st); + assert st.termState.size() == 0; + st.termState.register(state, reader); stQueue.offer(st); // possibly drop entries from queue if (stQueue.size() > maxSize) { st = stQueue.poll(); visitedTerms.remove(st.bytes); + st.termState.clear(); // reset the termstate! } else { st = new ScoreTerm(termComp); } @@ -120,6 +126,7 @@ maxBoostAtt.setCompetitiveTerm(t.bytes); } } + return true; } }); @@ -130,8 +137,8 @@ ArrayUtil.quickSort(scoreTerms, scoreTermSortByTermComp); for (final ScoreTerm st : scoreTerms) { final Term term = placeholderTerm.createTerm(st.bytes); - assert reader.docFreq(term) == st.docFreq; - addClause(q, term, st.docFreq, query.getBoost() * st.boost); // add to query + assert reader.docFreq(term) == st.termState.docFreq() : "reader DF is " + reader.docFreq(term) + " vs " + st.termState.docFreq(); + addClause(q, term, st.termState.docFreq(), query.getBoost() * st.boost, st.termState); // add to query } query.incTotalNumberOfTerms(scoreTerms.length); return q; @@ -147,7 +154,7 @@ if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; - final TopTermsRewrite other = (TopTermsRewrite) obj; + final TopTermsRewrite other = (TopTermsRewrite) obj; if (size != other.size) return false; return true; } @@ -163,11 +170,9 @@ static final class ScoreTerm implements Comparable { public final Comparator termComp; - public final BytesRef bytes = new BytesRef(); public float boost; - public int docFreq; - + public final PerReaderTermState termState = new PerReaderTermState(); public ScoreTerm(Comparator termComp) { this.termComp = termComp; } Index: lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java =================================================================== --- lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java (revision 1050469) +++ lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java (working copy) @@ -24,6 +24,7 @@ import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.DocIdSetIterator; @@ -284,11 +285,6 @@ } @Override - public void cacheCurrentTerm() throws IOException { - throw new UnsupportedOperationException(); - } - - @Override public BytesRef term() throws IOException { return term; } @@ -317,6 +313,24 @@ public Comparator getComparator() throws IOException { throw new UnsupportedOperationException(); } + + @Override + public SeekStatus seek(BytesRef term /*ignored*/, TermState state) throws IOException { + assert state != null; + return seek(state.ord()); + } + + @Override + public TermState termState() throws IOException { + return new TermState() { + @Override + public void copy(TermState other) { /**/ } + @Override + public long ord() { return currentOrd; } + @Override + public int docFreq() { return 0; } // TODO return 1 or throw unsupported operation? + }; + } } } } Index: lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java =================================================================== --- lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java (revision 1050469) +++ lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java (working copy) @@ -26,6 +26,7 @@ import org.apache.lucene.search.TopTermsRewrite; import org.apache.lucene.search.ScoringRewrite; import org.apache.lucene.search.BooleanClause.Occur; // javadocs only +import org.apache.lucene.util.PerReaderTermState; /** * Wraps any {@link MultiTermQuery} as a {@link SpanQuery}, @@ -153,7 +154,7 @@ } @Override - protected void addClause(SpanOrQuery topLevel, Term term, int docCount, float boost) { + protected void addClause(SpanOrQuery topLevel, Term term, int docCount, float boost, PerReaderTermState states) { final SpanTermQuery q = new SpanTermQuery(term); q.setBoost(boost); topLevel.addClause(q); @@ -202,7 +203,7 @@ } @Override - protected void addClause(SpanOrQuery topLevel, Term term, int docFreq, float boost) { + protected void addClause(SpanOrQuery topLevel, Term term, int docFreq, float boost, PerReaderTermState states) { final SpanTermQuery q = new SpanTermQuery(term); q.setBoost(boost); topLevel.addClause(q); Index: lucene/src/java/org/apache/lucene/util/PerReaderTermState.java =================================================================== --- lucene/src/java/org/apache/lucene/util/PerReaderTermState.java (revision 0) +++ lucene/src/java/org/apache/lucene/util/PerReaderTermState.java (revision 0) @@ -0,0 +1,156 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.IdentityHashMap; +import java.util.List; + +import org.apache.lucene.index.Fields; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermState; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.TermsEnum.SeekStatus; + +/** + * Maintains a {@link IndexReader} {@link TermState} view over + * {@link IndexReader} instances containing a single term. The + * {@link PerReaderTermState} doesn't track if the given {@link TermState} + * objects are valid, neither if the {@link TermState} instances refer to the + * same terms in the associated readers. + * + * @lucene.experimental + */ +public final class PerReaderTermState { + + private final IdentityHashMap stateMapping = new IdentityHashMap(); + private int docFreq; + + /** + * Creates an empty {@link PerReaderTermState} + */ + public PerReaderTermState() { + docFreq = 0; + } + + /** + * Creates a {@link PerReaderTermState} with an initial {@link TermState}, + * {@link IndexReader} pair + */ + public PerReaderTermState(TermState state, IndexReader reader) { + this(); + register(state, reader); + } + + /** + * Creates a {@link PerReaderTermState} from an {@link IndexReader} and the + * given {@link Term} This method will gather all of the {@link IndexReader}s + * sub-readers and register each of the sub-readers in the + * {@link PerReaderTermState} is the term is present in the sub-reader. + */ + public static PerReaderTermState build(IndexReader topLevel, Term term) + throws IOException { + final PerReaderTermState perReaderTermState = new PerReaderTermState(); + final List allSubReaders = new ArrayList(); + final String field = term.field(); + final BytesRef bytes = term.bytes(); + ReaderUtil.gatherSubReaders(allSubReaders, topLevel); + for (IndexReader indexReader : allSubReaders) { + final Fields fields = indexReader.fields(); + if (fields != null) { + final Terms terms = fields.terms(field); + if (terms != null) { + TermsEnum termsEnum = terms.iterator(); + if (SeekStatus.FOUND == termsEnum.seek(bytes, false)) { // no cache + // here! + final TermState termState = termsEnum.termState(); + perReaderTermState.register(termState, indexReader); + } + } + } + } + return perReaderTermState; + } + + public void clear() { + docFreq = 0; + stateMapping.clear(); + } + + /** + * Registers and associates a {@link TermState} with an {@link IndexReader}. + */ + public void register(TermState state, IndexReader reader) { + docFreq += state.docFreq(); + stateMapping.put(reader, state); + } + + /** + * Removes and returns the {@link TermState} registered for the given + * {@link IndexReader} + */ + public TermState take(IndexReader reader) { + return stateMapping.remove(reader); + } + + /** + * + * @return the number of registered {@link TermState} {@link IndexReader} + * pairs + */ + public int size() { + return stateMapping.size(); + } + + /** + * @return the accumulated document frequency of all {@link TermState} + * instances passed to {@link #register(TermState, IndexReader)}. + */ + public int docFreq() { + return docFreq; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + + ((stateMapping == null) ? 0 : stateMapping.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + PerReaderTermState other = (PerReaderTermState) obj; + if (stateMapping == null) { + if (other.stateMapping != null) + return false; + } else if (!stateMapping.equals(other.stateMapping)) + return false; + return true; + } +} Property changes on: lucene/src/java/org/apache/lucene/util/PerReaderTermState.java ___________________________________________________________________ Added: svn:eol-style + native Added: svn:keywords + Date Author Id Revision HeadURL Index: lucene/src/test/org/apache/lucene/TestExternalCodecs.java =================================================================== --- lucene/src/test/org/apache/lucene/TestExternalCodecs.java (revision 1050469) +++ lucene/src/test/org/apache/lucene/TestExternalCodecs.java (working copy) @@ -18,10 +18,12 @@ */ import org.apache.lucene.util.*; +import org.apache.lucene.util.Bits; import org.apache.lucene.index.*; import org.apache.lucene.document.*; import org.apache.lucene.search.*; import org.apache.lucene.analysis.*; +import org.apache.lucene.index.TermState.MockTermState; import org.apache.lucene.index.codecs.*; import org.apache.lucene.index.codecs.standard.*; import org.apache.lucene.index.codecs.pulsing.*; @@ -330,10 +332,6 @@ } @Override - public void cacheCurrentTerm() { - } - - @Override public DocsEnum docs(Bits skipDocs, DocsEnum reuse) { return new RAMDocsEnum(ramField.termToDocs.get(current), skipDocs); } @@ -342,6 +340,17 @@ public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) { return new RAMDocsAndPositionsEnum(ramField.termToDocs.get(current), skipDocs); } + + @Override + public SeekStatus seek(BytesRef term, TermState state) throws IOException { + assert term != null; + return seek(term); + } + + @Override + public TermState termState() throws IOException { + return new MockTermState(-1, docFreq()); + } } private static class RAMDocsEnum extends DocsEnum { Index: lucene/src/test/org/apache/lucene/search/TestMultiSearcher.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestMultiSearcher.java (revision 1050469) +++ lucene/src/test/org/apache/lucene/search/TestMultiSearcher.java (working copy) @@ -31,6 +31,7 @@ import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.store.Directory; import java.io.IOException; +import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.Map; @@ -308,11 +309,10 @@ hits=indexSearcher1.search(query, null, 1000).scoreDocs; assertEquals(message, 2, hits.length); - // Store the scores for use later float[] scores={ hits[0].score, hits[1].score }; - assertTrue(message, scores[0] > scores[1]); + assertTrue(message + " " + Arrays.toString(scores), scores[0] > scores[1]); indexSearcher1.close(); ramDirectory1.close(); Index: solr/src/java/org/apache/solr/request/UnInvertedField.java =================================================================== --- solr/src/java/org/apache/solr/request/UnInvertedField.java (revision 1050469) +++ solr/src/java/org/apache/solr/request/UnInvertedField.java (working copy) @@ -22,6 +22,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.Terms; import org.apache.lucene.index.MultiFields; @@ -1001,10 +1002,6 @@ return tenum.docFreq(); } - @Override - public void cacheCurrentTerm() { - throw new UnsupportedOperationException(); - } public BytesRef skipTo(BytesRef target) throws IOException { @@ -1114,6 +1111,18 @@ public SeekStatus seek(BytesRef target, boolean useCache) { throw new UnsupportedOperationException(); } + + + @Override + public SeekStatus seek(BytesRef term, TermState state) throws IOException { + throw new UnsupportedOperationException(); + } + + + @Override + public TermState termState() throws IOException { + throw new UnsupportedOperationException(); + } }