Index: solr/src/java/org/apache/solr/request/UnInvertedField.java =================================================================== --- solr/src/java/org/apache/solr/request/UnInvertedField.java (revision 1057163) +++ solr/src/java/org/apache/solr/request/UnInvertedField.java (working copy) @@ -1000,10 +1000,6 @@ return tenum.docFreq(); } - @Override - public void cacheCurrentTerm() { - throw new UnsupportedOperationException(); - } public BytesRef skipTo(BytesRef target) throws IOException { Index: lucene/src/test/org/apache/lucene/TestExternalCodecs.java =================================================================== --- lucene/src/test/org/apache/lucene/TestExternalCodecs.java (revision 1057163) +++ lucene/src/test/org/apache/lucene/TestExternalCodecs.java (working copy) @@ -18,6 +18,7 @@ */ import org.apache.lucene.util.*; +import org.apache.lucene.util.Bits; import org.apache.lucene.index.*; import org.apache.lucene.document.*; import org.apache.lucene.search.*; @@ -330,10 +331,6 @@ } @Override - public void cacheCurrentTerm() { - } - - @Override public DocsEnum docs(Bits skipDocs, DocsEnum reuse) { return new RAMDocsEnum(ramField.termToDocs.get(current), skipDocs); } Index: lucene/src/test/org/apache/lucene/search/QueryUtils.java =================================================================== --- lucene/src/test/org/apache/lucene/search/QueryUtils.java (revision 1057163) +++ lucene/src/test/org/apache/lucene/search/QueryUtils.java (working copy) @@ -20,6 +20,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.ReaderUtil; import static org.apache.lucene.util.LuceneTestCase.TEST_VERSION_CURRENT; @@ -213,21 +214,12 @@ } } - private static AtomicReaderContext[] getLeaves(IndexSearcher searcher) { - ReaderContext topLevelReaderContext = searcher.getTopReaderContext(); - if (topLevelReaderContext.isAtomic) { - return new AtomicReaderContext[] {(AtomicReaderContext) topLevelReaderContext}; - } else { - return topLevelReaderContext.leaves(); - } - } - /** alternate scorer skipTo(),skipTo(),next(),next(),skipTo(),skipTo(), etc * and ensure a hitcollector receives same docs and scores */ public static void checkSkipTo(final Query q, final IndexSearcher s) throws IOException { //System.out.println("Checking "+q); - final AtomicReaderContext[] context = getLeaves(s); + final AtomicReaderContext[] context = ReaderUtil.leaves(s.getTopReaderContext()); if (q.weight(s).scoresDocsOutOfOrder()) return; // in this case order of skipTo() might differ from that of next(). final int skip_op = 0; @@ -357,7 +349,7 @@ final float maxDiff = 1e-3f; final int lastDoc[] = {-1}; final IndexReader lastReader[] = {null}; - final ReaderContext[] context = getLeaves(s); + final ReaderContext[] context = ReaderUtil.leaves(s.getTopReaderContext()); s.search(q,new Collector() { private Scorer scorer; private int leafPtr; Index: lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java =================================================================== --- lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java (revision 1057163) +++ lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java (working copy) @@ -43,6 +43,7 @@ import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.codecs.CodecProvider; +import org.apache.lucene.index.codecs.PrefixCodedTermState; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.IndexInput; @@ -978,11 +979,7 @@ int ord = 0; while((term = termsEnum.next()) != null) { if (ord == 0) { - try { - termsEnum.ord(); - } catch (UnsupportedOperationException uoe) { - storeOrd = false; - } + storeOrd = termsEnum.termState() instanceof PrefixCodedTermState; } final int output; if (storeOrd) { @@ -1058,7 +1055,7 @@ assertEquals(termsEnum.term().utf8ToString() + " != " + fstEnum.current().input.utf8ToString(), termsEnum.term(), fstEnum.current().input); if (storeOrd) { // fst stored the ord - assertEquals(termsEnum.ord(), ((Long) fstEnum.current().output).longValue()); + assertEquals(((PrefixCodedTermState)termsEnum.termState()).ord(), ((Long) fstEnum.current().output).longValue()); } else { // fst stored the docFreq assertEquals(termsEnum.docFreq(), (int) (((Long) fstEnum.current().output).longValue())); Index: lucene/src/java/org/apache/lucene/search/ScoringRewrite.java =================================================================== --- lucene/src/java/org/apache/lucene/search/ScoringRewrite.java (revision 1057163) +++ lucene/src/java/org/apache/lucene/search/ScoringRewrite.java (working copy) @@ -20,6 +20,7 @@ import java.io.IOException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.MultiTermQuery.RewriteMethod; @@ -27,6 +28,7 @@ import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefHash; +import org.apache.lucene.util.PerReaderTermState; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray; @@ -53,8 +55,9 @@ } @Override - protected void addClause(BooleanQuery topLevel, Term term, int docCount, float boost) { - final TermQuery tq = new TermQuery(term, docCount); + protected void addClause(BooleanQuery topLevel, Term term, int docCount, + float boost, PerReaderTermState states) { + final TermQuery tq = new TermQuery(term, states); tq.setBoost(boost); topLevel.add(tq, BooleanClause.Occur.SHOULD); } @@ -114,13 +117,13 @@ final int size = col.terms.size(); if (size > 0) { final int sort[] = col.terms.sort(col.termsEnum.getComparator()); - final int[] docFreq = col.array.docFreq; final float[] boost = col.array.boost; + final PerReaderTermState[] termStates = col.array.termState; for (int i = 0; i < size; i++) { final int pos = sort[i]; final Term term = placeholderTerm.createTerm(col.terms.get(pos, new BytesRef())); - assert reader.docFreq(term) == docFreq[pos]; - addClause(result, term, docFreq[pos], query.getBoost() * boost[pos]); + assert reader.docFreq(term) == termStates[pos].docFreq(); + addClause(result, term, termStates[pos].docFreq(), query.getBoost() * boost[pos], termStates[pos]); } } query.incTotalNumberOfTerms(size); @@ -143,15 +146,17 @@ @Override public boolean collect(BytesRef bytes) throws IOException { final int e = terms.add(bytes); + final TermState state = termsEnum.termState(); + assert state != null; if (e < 0 ) { // duplicate term: update docFreq final int pos = (-e)-1; - array.docFreq[pos] += termsEnum.docFreq(); + array.termState[pos].register(state, readerContext.ord); assert array.boost[pos] == boostAtt.getBoost() : "boost should be equal in all segment TermsEnums"; } else { // new entry: we populate the entry initially - array.docFreq[e] = termsEnum.docFreq(); array.boost[e] = boostAtt.getBoost(); + array.termState[e] = new PerReaderTermState(topReaderContext, state, readerContext.ord); ScoringRewrite.this.checkMaxClauseCount(terms.size()); } return true; @@ -160,8 +165,8 @@ /** Special implementation of BytesStartArray that keeps parallel arrays for boost and docFreq */ static final class TermFreqBoostByteStart extends DirectBytesStartArray { - int[] docFreq; float[] boost; + PerReaderTermState[] termState; public TermFreqBoostByteStart(int initSize) { super(initSize); @@ -171,24 +176,28 @@ public int[] init() { final int[] ord = super.init(); boost = new float[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_FLOAT)]; - docFreq = new int[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_INT)]; - assert boost.length >= ord.length && docFreq.length >= ord.length; + termState = new PerReaderTermState[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; + assert termState.length >= ord.length && boost.length >= ord.length; return ord; } @Override public int[] grow() { final int[] ord = super.grow(); - docFreq = ArrayUtil.grow(docFreq, ord.length); boost = ArrayUtil.grow(boost, ord.length); - assert boost.length >= ord.length && docFreq.length >= ord.length; + if (termState.length < ord.length) { + PerReaderTermState[] tmpTermState = new PerReaderTermState[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; + System.arraycopy(termState, 0, tmpTermState, 0, termState.length); + termState = tmpTermState; + } + assert termState.length >= ord.length && boost.length >= ord.length; return ord; } @Override public int[] clear() { boost = null; - docFreq = null; + termState = null; return super.clear(); } Index: lucene/src/java/org/apache/lucene/search/MultiTermQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/MultiTermQuery.java (revision 1057163) +++ lucene/src/java/org/apache/lucene/search/MultiTermQuery.java (working copy) @@ -26,6 +26,7 @@ import org.apache.lucene.index.TermsEnum; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.PerReaderTermState; /** * An abstract {@link Query} that matches documents @@ -159,8 +160,8 @@ } @Override - protected void addClause(BooleanQuery topLevel, Term term, int docCount, float boost) { - final TermQuery tq = new TermQuery(term, docCount); + protected void addClause(BooleanQuery topLevel, Term term, int docCount, float boost, PerReaderTermState states) { + final TermQuery tq = new TermQuery(term, states); tq.setBoost(boost); topLevel.add(tq, BooleanClause.Occur.SHOULD); } @@ -200,8 +201,8 @@ } @Override - protected void addClause(BooleanQuery topLevel, Term term, int docFreq, float boost) { - final Query q = new ConstantScoreQuery(new TermQuery(term, docFreq)); + protected void addClause(BooleanQuery topLevel, Term term, int docFreq, float boost, PerReaderTermState states) { + final Query q = new ConstantScoreQuery(new TermQuery(term, states)); q.setBoost(boost); topLevel.add(q, BooleanClause.Occur.SHOULD); } Index: lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java =================================================================== --- lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java (revision 1057163) +++ lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java (working copy) @@ -24,8 +24,10 @@ import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.codecs.PrefixCodedTermState; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.FieldCache.DocTermsIndex; import org.apache.lucene.util.ArrayUtil; @@ -304,11 +306,6 @@ } @Override - public void cacheCurrentTerm() throws IOException { - throw new UnsupportedOperationException(); - } - - @Override public BytesRef term() throws IOException { return term; } @@ -337,6 +334,24 @@ public Comparator getComparator() throws IOException { return BytesRef.getUTF8SortedAsUnicodeComparator(); } + + @Override + public SeekStatus seek(TermState state) throws IOException { + assert state != null && state instanceof PrefixCodedTermState; + return this.seek(((PrefixCodedTermState)state).ord); + } + + @Override + public TermState termState() throws IOException { + PrefixCodedTermState state = new PrefixCodedTermState() { + @Override + public int docFreq() { + throw new UnsupportedOperationException(); + } + }; + state.ord = currentOrd; + return state; + } } } } Index: lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java (revision 1057163) +++ lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java (working copy) @@ -21,6 +21,7 @@ import java.util.Comparator; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; @@ -155,12 +156,24 @@ public DocsAndPositionsEnum docsAndPositions(Bits bits, DocsAndPositionsEnum reuse) throws IOException { return tenum.docsAndPositions(bits, reuse); } - + + /** This enum does not support seeking! + * @throws UnsupportedOperationException + */ @Override - public void cacheCurrentTerm() throws IOException { - tenum.cacheCurrentTerm(); + public SeekStatus seek(TermState state) throws IOException { + throw new UnsupportedOperationException(getClass().getName()+" does not support seeking"); } - + + /** + * Returns the filtered enums term state + */ + @Override + public TermState termState() throws IOException { + assert tenum != null; + return tenum.termState(); + } + @SuppressWarnings("fallthrough") @Override public BytesRef next() throws IOException { Index: lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java =================================================================== --- lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java (revision 1057163) +++ lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java (working copy) @@ -26,6 +26,7 @@ import org.apache.lucene.search.TopTermsRewrite; import org.apache.lucene.search.ScoringRewrite; import org.apache.lucene.search.BooleanClause.Occur; // javadocs only +import org.apache.lucene.util.PerReaderTermState; /** * Wraps any {@link MultiTermQuery} as a {@link SpanQuery}, @@ -153,7 +154,7 @@ } @Override - protected void addClause(SpanOrQuery topLevel, Term term, int docCount, float boost) { + protected void addClause(SpanOrQuery topLevel, Term term, int docCount, float boost, PerReaderTermState states) { final SpanTermQuery q = new SpanTermQuery(term); q.setBoost(boost); topLevel.addClause(q); @@ -202,7 +203,7 @@ } @Override - protected void addClause(SpanOrQuery topLevel, Term term, int docFreq, float boost) { + protected void addClause(SpanOrQuery topLevel, Term term, int docFreq, float boost, PerReaderTermState states) { final SpanTermQuery q = new SpanTermQuery(term); q.setBoost(boost); topLevel.addClause(q); Index: lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java =================================================================== --- lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java (revision 1057163) +++ lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java (working copy) @@ -25,9 +25,11 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.PerReaderTermState; /** * Base rewrite method for collecting only the top terms @@ -78,12 +80,12 @@ this.termComp = termsEnum.getComparator(); // lazy init the initial ScoreTerm because comparator is not known on ctor: if (st == null) - st = new ScoreTerm(this.termComp); + st = new ScoreTerm(this.termComp, new PerReaderTermState(topReaderContext)); boostAtt = termsEnum.attributes().addAttribute(BoostAttribute.class); } @Override - public boolean collect(BytesRef bytes) { + public boolean collect(BytesRef bytes) throws IOException { final float boost = boostAtt.getBoost(); // ignore uncompetetive hits if (stQueue.size() == maxSize) { @@ -94,23 +96,27 @@ return true; } ScoreTerm t = visitedTerms.get(bytes); + final TermState state = termsEnum.termState(); + assert state != null; if (t != null) { // if the term is already in the PQ, only update docFreq of term in PQ - t.docFreq += termsEnum.docFreq(); assert t.boost == boost : "boost should be equal in all segment TermsEnums"; + t.termState.register(state, readerContext.ord); } else { // add new entry in PQ, we must clone the term, else it may get overwritten! st.bytes.copy(bytes); st.boost = boost; - st.docFreq = termsEnum.docFreq(); visitedTerms.put(st.bytes, st); + assert st.termState.docFreq() == 0; + st.termState.register(state, readerContext.ord); stQueue.offer(st); // possibly drop entries from queue if (stQueue.size() > maxSize) { st = stQueue.poll(); visitedTerms.remove(st.bytes); + st.termState.clear(); // reset the termstate! } else { - st = new ScoreTerm(termComp); + st = new ScoreTerm(termComp, new PerReaderTermState(topReaderContext)); } assert stQueue.size() <= maxSize : "the PQ size must be limited to maxSize"; // set maxBoostAtt with values to help FuzzyTermsEnum to optimize @@ -120,6 +126,7 @@ maxBoostAtt.setCompetitiveTerm(t.bytes); } } + return true; } }); @@ -130,8 +137,8 @@ ArrayUtil.quickSort(scoreTerms, scoreTermSortByTermComp); for (final ScoreTerm st : scoreTerms) { final Term term = placeholderTerm.createTerm(st.bytes); - assert reader.docFreq(term) == st.docFreq; - addClause(q, term, st.docFreq, query.getBoost() * st.boost); // add to query + assert reader.docFreq(term) == st.termState.docFreq() : "reader DF is " + reader.docFreq(term) + " vs " + st.termState.docFreq(); + addClause(q, term, st.termState.docFreq(), query.getBoost() * st.boost, st.termState); // add to query } query.incTotalNumberOfTerms(scoreTerms.length); return q; @@ -147,7 +154,7 @@ if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; - final TopTermsRewrite other = (TopTermsRewrite) obj; + final TopTermsRewrite other = (TopTermsRewrite) obj; if (size != other.size) return false; return true; } @@ -163,13 +170,12 @@ static final class ScoreTerm implements Comparable { public final Comparator termComp; - public final BytesRef bytes = new BytesRef(); public float boost; - public int docFreq; - - public ScoreTerm(Comparator termComp) { + public final PerReaderTermState termState; + public ScoreTerm(Comparator termComp, PerReaderTermState termState) { this.termComp = termComp; + this.termState = termState; } public int compareTo(ScoreTerm other) { Index: lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java =================================================================== --- lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java (revision 1057163) +++ lucene/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java (working copy) @@ -21,9 +21,15 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefHash; +import org.apache.lucene.util.PerReaderTermState; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray; class ConstantScoreAutoRewrite extends TermCollectingRewrite { @@ -71,8 +77,8 @@ } @Override - protected void addClause(BooleanQuery topLevel, Term term, int docFreq, float boost /*ignored*/) { - topLevel.add(new TermQuery(term, docFreq), BooleanClause.Occur.SHOULD); + protected void addClause(BooleanQuery topLevel, Term term, int docFreq, float boost /*ignored*/, PerReaderTermState states) { + topLevel.add(new TermQuery(term, states), BooleanClause.Occur.SHOULD); } @Override @@ -98,9 +104,10 @@ final BytesRefHash pendingTerms = col.pendingTerms; final int sort[] = pendingTerms.sort(col.termsEnum.getComparator()); for(int i = 0; i < size; i++) { + final int pos = sort[i]; // docFreq is not used for constant score here, we pass 1 // to explicitely set a fake value, so it's not calculated - addClause(bq, placeholderTerm.createTerm(pendingTerms.get(sort[i], new BytesRef())), 1, 1.0f); + addClause(bq, placeholderTerm.createTerm(pendingTerms.get(pos, new BytesRef())), 1, 1.0f, col.array.termState[pos]); } // Strip scores final Query result = new ConstantScoreQuery(bq); @@ -123,12 +130,21 @@ @Override public boolean collect(BytesRef bytes) throws IOException { - pendingTerms.add(bytes); + int pos = pendingTerms.add(bytes); docVisitCount += termsEnum.docFreq(); if (pendingTerms.size() >= termCountLimit || docVisitCount >= docCountCutoff) { hasCutOff = true; return false; } + + final TermState termState = termsEnum.termState(); + assert termState != null; + if (pos < 0) { + pos = (-pos)-1; + array.termState[pos].register(termState, readerContext.ord); + } else { + array.termState[pos] = new PerReaderTermState(topReaderContext, termState, readerContext.ord); + } return true; } @@ -137,7 +153,8 @@ TermsEnum termsEnum; final int docCountCutoff, termCountLimit; - final BytesRefHash pendingTerms = new BytesRefHash(); + final TermStateByteStart array = new TermStateByteStart(16); + final BytesRefHash pendingTerms = new BytesRefHash(new ByteBlockPool(new ByteBlockPool.DirectAllocator()), 16, array); } @Override @@ -166,4 +183,40 @@ return true; } + + /** Special implementation of BytesStartArray that keeps parallel arrays for {@link PerReaderTermState} */ + static final class TermStateByteStart extends DirectBytesStartArray { + PerReaderTermState[] termState; + + public TermStateByteStart(int initSize) { + super(initSize); + } + + @Override + public int[] init() { + final int[] ord = super.init(); + termState = new PerReaderTermState[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; + assert termState.length >= ord.length; + return ord; + } + + @Override + public int[] grow() { + final int[] ord = super.grow(); + if (termState.length < ord.length) { + PerReaderTermState[] tmpTermState = new PerReaderTermState[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; + System.arraycopy(termState, 0, tmpTermState, 0, termState.length); + termState = tmpTermState; + } + assert termState.length >= ord.length; + return ord; + } + + @Override + public int[] clear() { + termState = null; + return super.clear(); + } + + } } Index: lucene/src/java/org/apache/lucene/search/TermQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/TermQuery.java (revision 1057163) +++ lucene/src/java/org/apache/lucene/search/TermQuery.java (working copy) @@ -22,10 +22,14 @@ import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.TermState; +import org.apache.lucene.index.Terms; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader.ReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.search.Explanation.IDFExplanation; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.PerReaderTermState; import org.apache.lucene.util.ToStringUtils; /** A Query that matches documents containing a term. @@ -33,18 +37,22 @@ */ public class TermQuery extends Query { private final Term term; - private final int docFreq; + private int docFreq; + private transient PerReaderTermState perReaderTermState; private class TermWeight extends Weight { private final Similarity similarity; private float value; - private float idf; + private final float idf; private float queryNorm; private float queryWeight; - private IDFExplanation idfExp; + private final IDFExplanation idfExp; + private final transient PerReaderTermState termStates; - public TermWeight(IndexSearcher searcher) + public TermWeight(IndexSearcher searcher, PerReaderTermState termStates, int docFreq) throws IOException { + assert termStates != null : "PerReaderTermState must not be null"; + this.termStates = termStates; this.similarity = getSimilarity(searcher); if (docFreq != -1) { idfExp = similarity.idfExplain(term, searcher, docFreq); @@ -76,21 +84,43 @@ value = queryWeight * idf; // idf for document } - @Override - public Scorer scorer(ReaderContext context, boolean scoreDocsInOrder, boolean topScorer) throws IOException { + private DocsEnum getDocsEnum(ReaderContext context, String field, BytesRef bytes) throws IOException { final IndexReader reader = context.reader; - DocsEnum docs = reader.termDocsEnum(reader.getDeletedDocs(), - term.field(), - term.bytes()); - - if (docs == null) { - return null; + if (context.isAtomic) { // TODO: we should assert to atomic eventually + assert assertTopReaderContext(termStates, context) : "The top-reader used to create Weight is not the same as the current reader's top-reader"; + final TermState state = termStates.get(((AtomicReaderContext)context).ord); + if (state == null) { // term is not present in that reader + assert termNotInReader(reader, field, bytes) : "no termstate found but term exists in reader"; + return null; + } + return reader.termDocsEnum(reader.getDeletedDocs(), field, state); + } else { + return reader.termDocsEnum(reader.getDeletedDocs(), field, bytes); } - - return new TermScorer(this, docs, similarity, reader.norms(term.field())); } @Override + public Scorer scorer(ReaderContext context, boolean scoreDocsInOrder, boolean topScorer) throws IOException { + final String field = term.field(); + final DocsEnum docs = getDocsEnum(context, term.field(), term.bytes()); + return docs == null ? null : new TermScorer(this, docs, similarity, context.reader.norms(field)); + } + + private boolean termNotInReader(IndexReader reader, String field, BytesRef bytes) throws IOException { + // only called from assert + final Terms terms = reader.terms(field); + return terms == null || terms.docFreq(bytes) == 0; + } + + private boolean assertTopReaderContext(PerReaderTermState state, ReaderContext context) { + while(context.parent != null) { + context = context.parent; + } + return state.topReaderContext == context; + } + + + @Override public Explanation explain(ReaderContext context, int doc) throws IOException { final IndexReader reader = context.reader; @@ -142,7 +172,7 @@ fieldExpl.addDetail(expl); Explanation fieldNormExpl = new Explanation(); - byte[] fieldNorms = reader.norms(field); + final byte[] fieldNorms = reader.norms(field); float fieldNorm = fieldNorms!=null ? similarity.decodeNormValue(fieldNorms[doc]) : 1.0f; fieldNormExpl.setValue(fieldNorm); @@ -178,14 +208,39 @@ public TermQuery(Term t, int docFreq) { term = t; this.docFreq = docFreq; + perReaderTermState = null; } + + /** Expert: constructs a TermQuery that will use the + * provided docFreq instead of looking up the docFreq + * against the searcher. */ + public TermQuery(Term t, PerReaderTermState states) { + assert states != null; + term = t; + docFreq = states.docFreq(); + perReaderTermState = states; + } /** Returns the term of this query. */ public Term getTerm() { return term; } @Override public Weight createWeight(IndexSearcher searcher) throws IOException { - return new TermWeight(searcher); + final ReaderContext context = searcher.getTopReaderContext(); + final int weightDocFreq; + final PerReaderTermState termState; + if (perReaderTermState == null || perReaderTermState.topReaderContext != context) { + // make TermQuery single-pass if we don't have a PRTS or if the context differs! + termState = PerReaderTermState.build(context, term); + // we must not ignore the given docFreq - if set use the given value + weightDocFreq = docFreq == -1 ? termState.docFreq() : docFreq; + } else { + // PRTS was pre-build for this IS + termState = this.perReaderTermState; + weightDocFreq = docFreq; + } + + return new TermWeight(searcher, termState, weightDocFreq); } @Override Index: lucene/src/java/org/apache/lucene/search/TermCollectingRewrite.java =================================================================== --- lucene/src/java/org/apache/lucene/search/TermCollectingRewrite.java (revision 1057163) +++ lucene/src/java/org/apache/lucene/search/TermCollectingRewrite.java (working copy) @@ -18,8 +18,6 @@ */ import java.io.IOException; -import java.util.ArrayList; -import java.util.List; import java.util.Comparator; import org.apache.lucene.index.Fields; @@ -27,25 +25,33 @@ import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.IndexReader.ReaderContext; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.PerReaderTermState; import org.apache.lucene.util.ReaderUtil; abstract class TermCollectingRewrite extends MultiTermQuery.RewriteMethod { + /** Return a suitable top-level Query for holding all expanded terms. */ protected abstract Q getTopLevelQuery() throws IOException; /** Add a MultiTermQuery term to the top-level query */ - protected abstract void addClause(Q topLevel, Term term, int docCount, float boost) throws IOException; + protected final void addClause(Q topLevel, Term term, int docCount, float boost) throws IOException { + addClause(topLevel, term, docCount, boost, null); + } + protected abstract void addClause(Q topLevel, Term term, int docCount, float boost, PerReaderTermState states) throws IOException; + + protected final void collectTerms(IndexReader reader, MultiTermQuery query, TermCollector collector) throws IOException { - final List subReaders = new ArrayList(); - ReaderUtil.gatherSubReaders(subReaders, reader); + ReaderContext topReaderContext = reader.getTopReaderContext(); Comparator lastTermComp = null; - - for (IndexReader r : subReaders) { - final Fields fields = r.fields(); + final AtomicReaderContext[] leaves = ReaderUtil.leaves(topReaderContext); + for (AtomicReaderContext context : leaves) { + final Fields fields = context.reader.fields(); if (fields == null) { // reader has no fields continue; @@ -68,11 +74,10 @@ if (lastTermComp != null && newTermComp != null && newTermComp != lastTermComp) throw new RuntimeException("term comparator should not change between segments: "+lastTermComp+" != "+newTermComp); lastTermComp = newTermComp; - + collector.setReaderContext(topReaderContext, context); collector.setNextEnum(termsEnum); BytesRef bytes; while ((bytes = termsEnum.next()) != null) { - termsEnum.cacheCurrentTerm(); if (!collector.collect(bytes)) return; // interrupt whole term collection, so also don't iterate other subReaders } @@ -80,6 +85,14 @@ } protected static abstract class TermCollector { + + protected AtomicReaderContext readerContext; + protected ReaderContext topReaderContext; + + public void setReaderContext(ReaderContext topReaderContext, AtomicReaderContext readerContext) { + this.readerContext = readerContext; + this.topReaderContext = topReaderContext; + } /** attributes used for communication with the enum */ public final AttributeSource attributes = new AttributeSource(); Index: lucene/src/java/org/apache/lucene/search/IndexSearcher.java =================================================================== --- lucene/src/java/org/apache/lucene/search/IndexSearcher.java (revision 1057163) +++ lucene/src/java/org/apache/lucene/search/IndexSearcher.java (working copy) @@ -38,6 +38,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; import org.apache.lucene.store.NIOFSDirectory; // javadoc +import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.ThreadInterruptedException; @@ -145,7 +146,7 @@ * @lucene.experimental */ public IndexSearcher(ReaderContext context) { - this(context, null); + this(context, (ExecutorService) null); } // convenience ctor for other IR based ctors @@ -155,17 +156,11 @@ private IndexSearcher(ReaderContext context, boolean closeReader, ExecutorService executor) { assert context.isTopLevel: "IndexSearcher's ReaderContext must be topLevel for reader" + context.reader; - reader = context.reader; this.executor = executor; this.closeReader = closeReader; - this.readerContext = context; - if (context.isAtomic) { - assert context.leaves() == null : "AtomicReaderContext must not have any leaves"; - this.leafContexts = new AtomicReaderContext[] { (AtomicReaderContext) context }; - } else { - assert context.leaves() != null : "non-atomic top-level context must have leaves"; - this.leafContexts = context.leaves(); - } + readerContext = context; + reader = context.reader; + leafContexts = ReaderUtil.leaves(context); if (executor == null) { subSearchers = null; @@ -175,13 +170,26 @@ if (leafContexts[i].reader == context.reader) { subSearchers[i] = this; } else { - subSearchers[i] = new IndexSearcher(leafContexts[i].reader.getTopReaderContext()); // we need to get a TL context for sub searchers! + subSearchers[i] = new IndexSearcher(context, leafContexts[i]); } } } } + + /* Ctor for concurrent sub-searchers searching only on a specific leaf of the given top-reader context + * - instead of searching over all leaves this searcher only searches a single leaf searcher slice. Hence, + * for scorer and filter this looks like an ordinary search in the hierarchy such that there is no difference + * between single and multi-threaded */ + private IndexSearcher(ReaderContext context, AtomicReaderContext leaf) { + reader = context.reader; + executor = null; + closeReader = false; + readerContext = context; + leafContexts = new AtomicReaderContext[] {leaf}; + subSearchers = null; + } - /** Return the {@link IndexReader} this searches. */ + /** Return the top-level {@link IndexReader} this searches. */ public IndexReader getIndexReader() { return reader; } @@ -365,7 +373,7 @@ for (int i = 0; i < subSearchers.length; i++) { // search each sub runner.submit( - new MultiSearcherCallableNoSort(lock, subSearchers[i], weight, filter, nDocs, hq, leafContexts[i].docBase)); + new SearchCallable(lock, subSearchers[i], weight, filter, nDocs, hq)); } int totalHits = 0; @@ -434,7 +442,7 @@ final ExecutionHelper runner = new ExecutionHelper(executor); for (int i = 0; i < subSearchers.length; i++) { // search each sub runner.submit( - new MultiSearcherCallableWithSort(lock, subSearchers[i], weight, filter, nDocs, hq, sort, leafContexts[i].docBase)); + new MultiSearcherCallableWithSort(lock, subSearchers[i], weight, filter, nDocs, hq, sort)); } int totalHits = 0; float maxScore = Float.NEGATIVE_INFINITY; @@ -621,25 +629,23 @@ /** * A thread subclass for searching a single searchable */ - private static final class MultiSearcherCallableNoSort implements Callable { + private static final class SearchCallable implements Callable { private final Lock lock; private final IndexSearcher searchable; private final Weight weight; private final Filter filter; private final int nDocs; - private final HitQueue hq; - private final int docBase; + private final PriorityQueue hq; - public MultiSearcherCallableNoSort(Lock lock, IndexSearcher searchable, Weight weight, - Filter filter, int nDocs, HitQueue hq, int docBase) { + public SearchCallable(Lock lock, IndexSearcher searchable, Weight weight, + Filter filter, int nDocs, PriorityQueue hq) { this.lock = lock; this.searchable = searchable; this.weight = weight; this.filter = filter; this.nDocs = nDocs; this.hq = hq; - this.docBase = docBase; } public TopDocs call() throws IOException { @@ -647,7 +653,6 @@ final ScoreDoc[] scoreDocs = docs.scoreDocs; for (int j = 0; j < scoreDocs.length; j++) { // merge scoreDocs into hq final ScoreDoc scoreDoc = scoreDocs[j]; - scoreDoc.doc += docBase; // convert doc //it would be so nice if we had a thread-safe insert lock.lock(); try { @@ -673,36 +678,21 @@ private final Filter filter; private final int nDocs; private final FieldDocSortedHitQueue hq; - private final int docBase; private final Sort sort; public MultiSearcherCallableWithSort(Lock lock, IndexSearcher searchable, Weight weight, - Filter filter, int nDocs, FieldDocSortedHitQueue hq, Sort sort, int docBase) { + Filter filter, int nDocs, FieldDocSortedHitQueue hq, Sort sort) { this.lock = lock; this.searchable = searchable; this.weight = weight; this.filter = filter; this.nDocs = nDocs; this.hq = hq; - this.docBase = docBase; this.sort = sort; } public TopFieldDocs call() throws IOException { final TopFieldDocs docs = searchable.search (weight, filter, nDocs, sort); - // If one of the Sort fields is FIELD_DOC, need to fix its values, so that - // it will break ties by doc Id properly. Otherwise, it will compare to - // 'relative' doc Ids, that belong to two different searchables. - for (int j = 0; j < docs.fields.length; j++) { - if (docs.fields[j].getType() == SortField.DOC) { - // iterate over the score docs and change their fields value - for (int j2 = 0; j2 < docs.scoreDocs.length; j2++) { - FieldDoc fd = (FieldDoc) docs.scoreDocs[j2]; - fd.fields[j] = Integer.valueOf(((Integer) fd.fields[j]).intValue() + docBase); - } - break; - } - } lock.lock(); try { @@ -714,7 +704,6 @@ final ScoreDoc[] scoreDocs = docs.scoreDocs; for (int j = 0; j < scoreDocs.length; j++) { // merge scoreDocs into hq final FieldDoc fieldDoc = (FieldDoc) scoreDocs[j]; - fieldDoc.doc += docBase; // convert doc //it would be so nice if we had a thread-safe insert lock.lock(); try { Index: lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java (revision 1057163) +++ lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java (working copy) @@ -20,6 +20,7 @@ import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeImpl; @@ -245,11 +246,6 @@ } @Override - public void cacheCurrentTerm() throws IOException { - actualEnum.cacheCurrentTerm(); - } - - @Override public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException { return actualEnum.docs(skipDocs, reuse); } @@ -260,7 +256,16 @@ return actualEnum.docsAndPositions(skipDocs, reuse); } + public SeekStatus seek(TermState state) throws IOException { + return actualEnum.seek(state); + } + @Override + public TermState termState() throws IOException { + return actualEnum.termState(); + } + + @Override public Comparator getComparator() throws IOException { return actualEnum.getComparator(); } Index: lucene/src/java/org/apache/lucene/index/TermState.java =================================================================== --- lucene/src/java/org/apache/lucene/index/TermState.java (revision 0) +++ lucene/src/java/org/apache/lucene/index/TermState.java (revision 0) @@ -0,0 +1,53 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Holds all state required for {@link TermsEnum} to produce a {@link DocsEnum} + * without re-seeking the terms dict. + * + * @lucene.experimental + */ +public abstract class TermState implements Cloneable { + + /** + * Copies the content of the given {@link TermState} to this instance + * + * @param other + * the TermState to copy + */ + public abstract void copy(TermState other); + + /** + * Returns the document frequency of the term this {@link TermState} was + * created for in the creating {@link TermsEnum} + * @return the document frequency of the term this {@link TermState} was + * created for in the creating {@link TermsEnum} + */ + public abstract int docFreq(); + + @Override + public Object clone() { + try { + return super.clone(); + } catch (CloneNotSupportedException cnse) { + // should not happen + throw new RuntimeException(cnse); + } + } +} \ No newline at end of file Property changes on: lucene\src\java\org\apache\lucene\index\TermState.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/src/java/org/apache/lucene/index/MultiReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/MultiReader.java (revision 1057163) +++ lucene/src/java/org/apache/lucene/index/MultiReader.java (working copy) @@ -19,7 +19,6 @@ import java.io.IOException; import java.util.Collection; -import java.util.HashMap; import java.util.Map; import org.apache.lucene.document.Document; Index: lucene/src/java/org/apache/lucene/index/BufferedDeletes.java =================================================================== --- lucene/src/java/org/apache/lucene/index/BufferedDeletes.java (revision 1057163) +++ lucene/src/java/org/apache/lucene/index/BufferedDeletes.java (working copy) @@ -373,7 +373,6 @@ Query query = entry.getKey(); int limit = entry.getValue().intValue(); Weight weight = query.weight(searcher); - Scorer scorer = weight.scorer(readerContext, true, false); if (scorer != null) { while(true) { Index: lucene/src/java/org/apache/lucene/index/FilterIndexReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (revision 1057163) +++ lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (working copy) @@ -131,11 +131,6 @@ } @Override - public void cacheCurrentTerm() throws IOException { - in.cacheCurrentTerm(); - } - - @Override public SeekStatus seek(long ord) throws IOException { return in.seek(ord); } @@ -174,6 +169,16 @@ public Comparator getComparator() throws IOException { return in.getComparator(); } + + @Override + public SeekStatus seek(TermState state) throws IOException { + return in.seek(state); + } + + @Override + public TermState termState() throws IOException { + return in.termState(); + } } /** Base class for filtering {@link DocsEnum} implementations. */ Index: lucene/src/java/org/apache/lucene/index/TermsEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/index/TermsEnum.java (revision 1057163) +++ lucene/src/java/org/apache/lucene/index/TermsEnum.java (working copy) @@ -73,6 +73,20 @@ * may be before or after the current ord. See {@link * #seek(BytesRef)}. */ public abstract SeekStatus seek(long ord) throws IOException; + + /** + * Expert: Seeks to the specified term by {@link TermState} as previously + * returned by {@link #termState()}. Callers should maintain the {@link TermState} + * to use this method. + * Low-level implementations will position the TermsEnum without re-seeking + * the term dictionary. Using this method with a {@link TermState} obtained + * from a different {@link Terms} instance will leave the {@link TermsEnum} in + * undefined state. + * */ + public SeekStatus seek(TermState state) throws IOException { + return seek(((SimpleTermState)state).bytes); + } + /** Increments the enumeration to the next element. * Returns the resulting term, or null if the end was @@ -98,7 +112,7 @@ * first time, after next() returns null or seek returns * {@link SeekStatus#END}.*/ public abstract int docFreq(); - + /** Get {@link DocsEnum} for the current term. Do not * call this before calling {@link #next} or {@link * #seek} for the first time. This method will not @@ -116,6 +130,20 @@ * the postings by this codec. */ public abstract DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException; + /** + * Expert: Returns the TermsEnums internal state to position the TermsEnum + * without re-seeking the term dictionary. + * + * @see TermState + * @see #seek(TermState) + */ + public TermState termState() throws IOException { + SimpleTermState state = new SimpleTermState(); + state.docFreq = docFreq(); + state.bytes = new BytesRef(term()); + return state; + } + /** Return the {@link BytesRef} Comparator used to sort * terms provided by the iterator. This may return * null if there are no terms. Callers may invoke this @@ -123,10 +151,6 @@ * instance & reuse it. */ public abstract Comparator getComparator() throws IOException; - /** Optional optimization hint: informs the codec that the - * current term is likely to be re-seek'd-to soon. */ - public abstract void cacheCurrentTerm() throws IOException; - /** An empty TermsEnum for quickly returning an empty instance e.g. * in {@link org.apache.lucene.search.MultiTermQuery} *

Please note: This enum should be unmodifiable, @@ -142,9 +166,6 @@ public SeekStatus seek(long ord) { return SeekStatus.END; } @Override - public void cacheCurrentTerm() {} - - @Override public BytesRef term() { throw new IllegalStateException("this method should never be called"); } @@ -183,5 +204,36 @@ public synchronized AttributeSource attributes() { return super.attributes(); } + + @Override + public TermState termState() throws IOException { + throw new IllegalStateException("this method should never be called"); + } + + @Override + public SeekStatus seek(TermState state) throws IOException { + throw new IllegalStateException("this method should never be called"); + } }; + + static class SimpleTermState extends TermState { + BytesRef bytes; + int docFreq; + + @Override + public void copy(TermState other) { + if (other instanceof SimpleTermState) { + SimpleTermState simpleOther = (SimpleTermState)other; + this.bytes = simpleOther.bytes; + this.docFreq = simpleOther.docFreq; + } else { + throw new UnsupportedOperationException("cannot copy TermState from " + other.getClass().getName()); + } + } + + @Override + public int docFreq() { + return docFreq; + } + } } Index: lucene/src/java/org/apache/lucene/index/IndexReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/IndexReader.java (revision 1057163) +++ lucene/src/java/org/apache/lucene/index/IndexReader.java (working copy) @@ -1070,7 +1070,48 @@ return null; } } + + /** + * Returns {@link DocsEnum} for the specified field and + * {@link TermState}. This may return null, if either the field or the term + * does not exists or the {@link TermState} is invalid for the underlying + * implementation.*/ + public DocsEnum termDocsEnum(Bits skipDocs, String field, TermState state) throws IOException { + assert state != null; + assert field != null; + final Fields fields = fields(); + if (fields == null) { + return null; + } + final Terms terms = fields.terms(field); + if (terms != null) { + return terms.docs(skipDocs, state, null); + } else { + return null; + } + } + + /** + * Returns {@link DocsAndPositionsEnum} for the specified field and + * {@link TermState}. This may return null, if either the field or the term + * does not exists, the {@link TermState} is invalid for the underlying + * implementation, or positions were not stored for this term.*/ + public DocsAndPositionsEnum termPositionsEnum(Bits skipDocs, String field, TermState state) throws IOException { + assert state != null; + assert field != null; + final Fields fields = fields(); + if (fields == null) { + return null; + } + final Terms terms = fields.terms(field); + if (terms != null) { + return terms.docsAndPositions(skipDocs, state, null); + } else { + return null; + } + } + /** Deletes the document numbered docNum. Once a document is * deleted it will not appear in TermDocs or TermPositions enumerations. * Attempts to read its field with the {@link #document} Index: lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java (revision 1057163) +++ lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java (working copy) @@ -91,13 +91,6 @@ } @Override - public void cacheCurrentTerm() throws IOException { - for(int i=0;i getComparator() { return termComp; } Index: lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java (revision 1057163) +++ lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java (working copy) @@ -22,8 +22,9 @@ import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.DocsAndPositionsEnum; -import org.apache.lucene.index.codecs.TermState; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.codecs.PostingsReaderBase; +import org.apache.lucene.index.codecs.PrefixCodedTermState; import org.apache.lucene.index.codecs.pulsing.PulsingPostingsWriterImpl.Document; import org.apache.lucene.index.codecs.pulsing.PulsingPostingsWriterImpl.Position; import org.apache.lucene.store.IndexInput; @@ -57,14 +58,14 @@ wrappedPostingsReader.init(termsIn); } - private static class PulsingTermState extends TermState { + private static final class PulsingTermState extends PrefixCodedTermState { private Document docs[]; - private TermState wrappedTermState; + private PrefixCodedTermState wrappedTermState; private boolean pendingIndexTerm; + @Override public Object clone() { - PulsingTermState clone; - clone = (PulsingTermState) super.clone(); + final PulsingTermState clone = (PulsingTermState) super.clone(); clone.docs = docs.clone(); for(int i=0;i termComp; // Caches the most recently looked-up field + terms: - private final DoubleBarrelLRUCache termsCache; + private final DoubleBarrelLRUCache termsCache; // Reads the terms index private TermsIndexReaderBase indexReader; @@ -84,11 +85,6 @@ public FieldAndTerm() { } - public FieldAndTerm(String field, BytesRef term) { - this.field = field; - this.term = new BytesRef(term); - } - public FieldAndTerm(FieldAndTerm other) { field = other.field; term = new BytesRef(other.term); @@ -116,7 +112,7 @@ throws IOException { this.postingsReader = postingsReader; - termsCache = new DoubleBarrelLRUCache(termsCacheSize); + termsCache = new DoubleBarrelLRUCache(termsCacheSize); this.termComp = termComp; @@ -278,10 +274,10 @@ } // Iterates through terms in this field, not supporting ord() - private class SegmentTermsEnum extends TermsEnum { + private final class SegmentTermsEnum extends TermsEnum { private final IndexInput in; private final DeltaBytesReader bytesReader; - private final TermState state; + private final PrefixCodedTermState state; private boolean seekPending; private final FieldAndTerm fieldTerm = new FieldAndTerm(); private final TermsIndexReaderBase.FieldIndexEnum indexEnum; @@ -307,14 +303,6 @@ return termComp; } - @Override - public void cacheCurrentTerm() { - TermState stateCopy = (TermState) state.clone(); - stateCopy.filePointer = in.getFilePointer(); - termsCache.put(new FieldAndTerm(fieldInfo.name, bytesReader.term), - stateCopy); - } - // called only from assert private boolean first; private int indexTermCount; @@ -342,7 +330,7 @@ * is found, SeekStatus.NOT_FOUND if a different term * was found, SeekStatus.END if we hit EOF */ @Override - public SeekStatus seek(BytesRef term, boolean useCache) throws IOException { + public SeekStatus seek(final BytesRef term, final boolean useCache) throws IOException { if (indexEnum == null) { throw new IllegalStateException("terms index was not loaded"); @@ -352,19 +340,14 @@ // Check cache fieldTerm.term = term; - TermState cachedState; if (useCache) { - cachedState = termsCache.get(fieldTerm); + final PrefixCodedTermState cachedState = termsCache.get(fieldTerm); if (cachedState != null) { - state.copy(cachedState); - seekPending = true; + setTermState(cachedState); positioned = false; - bytesReader.term.copy(term); //System.out.println(" cached!"); return SeekStatus.FOUND; } - } else { - cachedState = null; } boolean doSeek = true; @@ -439,12 +422,7 @@ if (cmp == 0) { // Done! if (useCache) { - // Store in cache - FieldAndTerm entryKey = new FieldAndTerm(fieldTerm); - cachedState = (TermState) state.clone(); - // this is fp after current term - cachedState.filePointer = in.getFilePointer(); - termsCache.put(entryKey, cachedState); + cacheTerm(fieldTerm); } return SeekStatus.FOUND; @@ -464,6 +442,23 @@ return SeekStatus.END; } + private final void setTermState(final TermState termState) { + assert termState != null; + state.copy(termState); + seekPending = true; + //bytesReader.term.copy(term); + } + + private final void cacheTerm(FieldAndTerm other) { + // Store in cache + final FieldAndTerm entryKey = new FieldAndTerm(other); + final PrefixCodedTermState cachedState = (PrefixCodedTermState) state.clone(); + // this is fp after current term + cachedState.filePointer = in.getFilePointer(); + termsCache.put(entryKey, cachedState); + } + + @Override public BytesRef term() { return bytesReader.term; @@ -512,7 +507,7 @@ @Override public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException { - DocsEnum docsEnum = postingsReader.docs(fieldInfo, state, skipDocs, reuse); + final DocsEnum docsEnum = postingsReader.docs(fieldInfo, state, skipDocs, reuse); assert docsEnum != null; return docsEnum; } @@ -527,6 +522,22 @@ } @Override + public SeekStatus seek(TermState otherState) throws IOException { + assert otherState != null && otherState instanceof PrefixCodedTermState; + assert otherState.getClass() == this.state.getClass() : "Illegal TermState type " + otherState.getClass(); + assert ((PrefixCodedTermState)otherState).ord() < numTerms; + setTermState(otherState); + return SeekStatus.FOUND; + } + + @Override + public TermState termState() throws IOException { + final PrefixCodedTermState newTermState = (PrefixCodedTermState) state.clone(); + newTermState.filePointer = in.getFilePointer(); + return newTermState; + } + + @Override public SeekStatus seek(long ord) throws IOException { if (indexEnum == null) { @@ -562,7 +573,6 @@ return SeekStatus.FOUND; } - @Override public long ord() { if (!doOrd) { throw new UnsupportedOperationException(); Index: lucene/src/java/org/apache/lucene/index/Terms.java =================================================================== --- lucene/src/java/org/apache/lucene/index/Terms.java (revision 1057163) +++ lucene/src/java/org/apache/lucene/index/Terms.java (working copy) @@ -80,6 +80,37 @@ } } + /** + * Expert: Get {@link DocsEnum} for the specified {@link TermState}. + * This method may return null if the term does not exist. + * + * @see TermsEnum#termState() + * @see TermsEnum#seek(TermState) */ + public DocsEnum docs(Bits skipDocs, TermState termState, DocsEnum reuse) throws IOException { + final TermsEnum termsEnum = getThreadTermsEnum(); + if (termsEnum.seek(termState) == TermsEnum.SeekStatus.FOUND) { + return termsEnum.docs(skipDocs, reuse); + } else { + return null; + } + } + + /** + * Get {@link DocsEnum} for the specified {@link TermState}. This + * method will may return null if the term does not exists, or positions were + * not indexed. + * + * @see TermsEnum#termState() + * @see TermsEnum#seek(BytesRef, TermState) */ + public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, TermState termState, DocsAndPositionsEnum reuse) throws IOException { + final TermsEnum termsEnum = getThreadTermsEnum(); + if (termsEnum.seek(termState) == TermsEnum.SeekStatus.FOUND) { + return termsEnum.docsAndPositions(skipDocs, reuse); + } else { + return null; + } + } + public long getUniqueTermCount() throws IOException { throw new UnsupportedOperationException("this reader does not implement getUniqueTermCount()"); } Index: lucene/src/java/org/apache/lucene/util/ReaderUtil.java =================================================================== --- lucene/src/java/org/apache/lucene/util/ReaderUtil.java (revision 1057163) +++ lucene/src/java/org/apache/lucene/util/ReaderUtil.java (working copy) @@ -212,6 +212,22 @@ } } + + /** + * Returns the context's leaves or the context itself as the only element of + * the returned array. If the context's #leaves() method returns + * null the given context must be an instance of + * {@link AtomicReaderContext} + */ + public static AtomicReaderContext[] leaves(ReaderContext context) { + assert context != null && context.isTopLevel : "context must be non-null & top-level"; + final AtomicReaderContext[] leaves = context.leaves(); + if (leaves == null) { + assert context.isAtomic : "top-level context without leaves must be atomic"; + return new AtomicReaderContext[] {(AtomicReaderContext) context}; + } + return leaves; + } /** Index: lucene/src/java/org/apache/lucene/util/PerReaderTermState.java =================================================================== --- lucene/src/java/org/apache/lucene/util/PerReaderTermState.java (revision 0) +++ lucene/src/java/org/apache/lucene/util/PerReaderTermState.java (revision 0) @@ -0,0 +1,149 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Arrays; + +import org.apache.lucene.index.Fields; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermState; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.IndexReader.ReaderContext; +import org.apache.lucene.index.TermsEnum.SeekStatus; + +/** + * Maintains a {@link IndexReader} {@link TermState} view over + * {@link IndexReader} instances containing a single term. The + * {@link PerReaderTermState} doesn't track if the given {@link TermState} + * objects are valid, neither if the {@link TermState} instances refer to the + * same terms in the associated readers. + * + * @lucene.experimental + */ +public final class PerReaderTermState { + public final ReaderContext topReaderContext; // for asserting! + private final TermState[] states; + private int docFreq; + + /** + * Creates an empty {@link PerReaderTermState} from a {@link ReaderContext} + */ + public PerReaderTermState(ReaderContext context) { + assert context != null && context.isTopLevel; + topReaderContext = context; + docFreq = 0; + final int len; + if (context.leaves() == null) { + len = 1; + } else { + len = context.leaves().length; + } + states = new TermState[len]; + } + + /** + * Creates a {@link PerReaderTermState} with an initial {@link TermState}, + * {@link IndexReader} pair. + */ + public PerReaderTermState(ReaderContext context, TermState state, int ord) { + this(context); + register(state, ord); + } + + /** + * Creates a {@link PerReaderTermState} from a top-level {@link ReaderContext} and the + * given {@link Term}. This method will lookup the given term in all context's leaf readers + * and register each of the readers containing the term in the returned {@link PerReaderTermState} + * using the leaf reader's ordinal. + *

+ * Note: the given context must be a top-level context. + */ + public static PerReaderTermState build(ReaderContext context, Term term) + throws IOException { + assert context != null && context.isTopLevel; + final String field = term.field(); + final BytesRef bytes = term.bytes(); + final PerReaderTermState perReaderTermState = new PerReaderTermState(context); + final AtomicReaderContext[] leaves = ReaderUtil.leaves(context); + for (int i = 0; i < leaves.length; i++) { + final Fields fields = leaves[i].reader.fields(); + if (fields != null) { + final Terms terms = fields.terms(field); + if (terms != null) { + TermsEnum termsEnum = terms.iterator(); + if (SeekStatus.FOUND == termsEnum.seek(bytes, false)) { // no cache + // here! + final TermState termState = termsEnum.termState(); + perReaderTermState.register(termState, i); + } + } + } + } + return perReaderTermState; + } + + /** + * Clears the {@link PerReaderTermState} internal state and removes all + * registered {@link TermState}s + */ + public void clear() { + docFreq = 0; + Arrays.fill(states, null); + } + + /** + * Registers and associates a {@link TermState} with an leaf ordinal. The leaf ordinal + * should be derived from a {@link ReaderContext}'s leaf ord. + */ + public void register(TermState state, final int ord) { + assert state != null : "state must not be null"; + assert ord >= 0 && ord < states.length; + assert states[ord] == null : "state for ord: " + ord + + " already registered"; + docFreq += state.docFreq(); + states[ord] = state; + } + + /** + * Returns the {@link TermState} for an leaf ordinal or null if no + * {@link TermState} for the ordinal was registered. + * + * @param ord + * the readers leaf ordinal to get the {@link TermState} for. + * @return the {@link TermState} for the given readers ord or null if no + * {@link TermState} for the reader was registered + */ + public TermState get(int ord) { + assert ord >= 0 && ord < states.length; + return states[ord]; + } + + /** + * Returns the accumulated document frequency of all {@link TermState} + * instances passed to {@link #register(TermState, int)}. + * @return the accumulated document frequency of all {@link TermState} + * instances passed to {@link #register(TermState, int)}. + */ + public int docFreq() { + return docFreq; + } +} \ No newline at end of file Property changes on: lucene\src\java\org\apache\lucene\util\PerReaderTermState.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java =================================================================== --- lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java (revision 1057163) +++ lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java (working copy) @@ -19,9 +19,13 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.codecs.PrefixCodedTermState; + +import java.io.IOException; import java.util.Arrays; import java.util.Comparator; @@ -91,10 +95,6 @@ } @Override - public void cacheCurrentTerm() { - } - - @Override public BytesRef term() { return br; } @@ -129,5 +129,19 @@ public Comparator getComparator() { return BytesRef.getUTF8SortedAsUnicodeComparator(); } + + @Override + public TermState termState() throws IOException { + final PrefixCodedTermState state = new PrefixCodedTermState(); + state.docFreq = docFreq(); + state.ord = upto - start; + return state; + } + + @Override + public SeekStatus seek(TermState state) throws IOException { + assert state != null && state instanceof PrefixCodedTermState; + return seek(((PrefixCodedTermState)state).ord()); // just use the ord for simplicity + } } Index: lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java =================================================================== --- lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (revision 1057163) +++ lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (working copy) @@ -38,6 +38,7 @@ import org.apache.lucene.document.FieldSelector; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Fields; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.FieldsEnum; @@ -883,10 +884,6 @@ } @Override - public void cacheCurrentTerm() { - } - - @Override public long ord() { return termUpto; } @@ -916,8 +913,38 @@ public Comparator getComparator() { return BytesRef.getUTF8SortedAsUnicodeComparator(); } + + @Override + public SeekStatus seek(TermState state) throws IOException { + assert state != null; + return this.seek(((MemoryTermState)state).ord); + } + + @Override + public TermState termState() throws IOException { + MemoryTermState ts = new MemoryTermState(); + ts.ord = termUpto; + return ts; + } } + + private class MemoryTermState extends TermState { + int ord; + + @Override + public void copy(TermState other) { + if (other instanceof MemoryTermState) + this.ord = ((MemoryTermState) other).ord; + else + throw new UnsupportedOperationException("cannot copy termstate from: " + other.getClass().getName()); + } + @Override + public int docFreq() { + return 1; + } + } + private class MemoryDocsEnum extends DocsEnum { private ArrayIntList positions; private boolean hasNext;