Index: modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java =================================================================== --- modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (revision 1203747) +++ modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (working copy) @@ -39,7 +39,7 @@ import org.apache.lucene.benchmark.byTask.tasks.WriteLineDocTask; import org.apache.lucene.collation.CollationKeyAnalyzer; import org.apache.lucene.facet.taxonomy.TaxonomyReader; -import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.DocsAndFreqsEnum; import org.apache.lucene.index.Fields; import org.apache.lucene.index.FieldsEnum; import org.apache.lucene.index.IndexReader; @@ -491,9 +491,9 @@ continue; } TermsEnum termsEnum = terms.iterator(null); - DocsEnum docs = null; + DocsAndFreqsEnum docs = null; while(termsEnum.next() != null) { - docs = termsEnum.docs(MultiFields.getLiveDocs(reader), docs); + docs = termsEnum.docsAndFreqs(MultiFields.getLiveDocs(reader), docs); while(docs.nextDoc() != docs.NO_MORE_DOCS) { totalTokenCount2 += docs.freq(); } Index: modules/queries/src/java/org/apache/lucene/queries/function/valuesource/TermFreqValueSource.java =================================================================== --- modules/queries/src/java/org/apache/lucene/queries/function/valuesource/TermFreqValueSource.java (revision 1203747) +++ modules/queries/src/java/org/apache/lucene/queries/function/valuesource/TermFreqValueSource.java (working copy) @@ -43,7 +43,7 @@ final Terms terms = fields.terms(field); return new IntDocValues(this) { - DocsEnum docs ; + DocsAndFreqsEnum docs; int atDoc; int lastDocRequested = -1; @@ -55,7 +55,7 @@ if (terms != null) { final TermsEnum termsEnum = terms.iterator(null); if (termsEnum.seekExact(indexedBytes, false)) { - docs = termsEnum.docs(null, null); + docs = termsEnum.docsAndFreqs(null, null); } else { docs = null; } @@ -64,7 +64,7 @@ } if (docs == null) { - docs = new DocsEnum() { + docs = new DocsAndFreqsEnum() { @Override public int freq() { return 0; Index: modules/queries/src/java/org/apache/lucene/queries/function/valuesource/TFValueSource.java =================================================================== --- modules/queries/src/java/org/apache/lucene/queries/function/valuesource/TFValueSource.java (revision 1203747) +++ modules/queries/src/java/org/apache/lucene/queries/function/valuesource/TFValueSource.java (working copy) @@ -51,7 +51,7 @@ final TFIDFSimilarity similarity = (TFIDFSimilarity) sim; return new FloatDocValues(this) { - DocsEnum docs ; + DocsAndFreqsEnum docs ; int atDoc; int lastDocRequested = -1; @@ -62,7 +62,7 @@ if (terms != null) { final TermsEnum termsEnum = terms.iterator(null); if (termsEnum.seekExact(indexedBytes, false)) { - docs = termsEnum.docs(null, null); + docs = termsEnum.docsAndFreqs(null, null); } else { docs = null; } @@ -71,7 +71,7 @@ } if (docs == null) { - docs = new DocsEnum() { + docs = new DocsAndFreqsEnum() { @Override public int freq() { return 0; Index: lucene/src/test/org/apache/lucene/search/TestTermVectors.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestTermVectors.java (revision 1203747) +++ lucene/src/test/org/apache/lucene/search/TestTermVectors.java (working copy) @@ -35,6 +35,7 @@ import org.apache.lucene.util.English; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; public class TestTermVectors extends LuceneTestCase { private IndexSearcher searcher; @@ -261,7 +262,7 @@ knownSearcher.setSimilarityProvider(new DefaultSimilarityProvider()); FieldsEnum fields = MultiFields.getFields(knownSearcher.reader).iterator(); - DocsEnum docs = null; + DocsAndFreqsEnum docs = null; while(fields.next() != null) { Terms terms = fields.terms(); assertNotNull(terms); @@ -269,7 +270,7 @@ while (termsEnum.next() != null) { String text = termsEnum.term().utf8ToString(); - docs = termsEnum.docs(MultiFields.getLiveDocs(knownSearcher.reader), docs); + docs = _TestUtil.docsAndFreqs(random, termsEnum, MultiFields.getLiveDocs(knownSearcher.reader), docs); while (docs.nextDoc() != DocsEnum.NO_MORE_DOCS) { int docId = docs.docID(); Index: lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java (revision 1203747) +++ lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java (working copy) @@ -500,9 +500,9 @@ // Make sure the doc that hit the exception was marked // as deleted: DocsEnum tdocs = MultiFields.getTermDocsEnum(reader, - MultiFields.getLiveDocs(reader), - t.field(), - new BytesRef(t.text())); + MultiFields.getLiveDocs(reader), + t.field(), + new BytesRef(t.text())); int count = 0; while(tdocs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { Index: lucene/src/test/org/apache/lucene/index/TestOmitPositions.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestOmitPositions.java (revision 1203747) +++ lucene/src/test/org/apache/lucene/index/TestOmitPositions.java (working copy) @@ -52,7 +52,7 @@ assertNull(MultiFields.getTermPositionsEnum(reader, null, "foo", new BytesRef("test"))); - DocsEnum de = MultiFields.getTermDocsEnum(reader, null, "foo", new BytesRef("test")); + DocsAndFreqsEnum de = MultiFields.getTermDocsAndFreqsEnum(reader, null, "foo", new BytesRef("test")); while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { assertEquals(2, de.freq()); } Index: lucene/src/test/org/apache/lucene/index/TestDirectoryReader.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestDirectoryReader.java (revision 1203747) +++ lucene/src/test/org/apache/lucene/index/TestDirectoryReader.java (working copy) @@ -17,7 +17,8 @@ * limitations under the License. */ -import org.apache.lucene.util.LuceneTestCase; +import java.io.IOException; +import java.util.Random; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; @@ -25,10 +26,9 @@ import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; -import java.io.IOException; -import java.util.Random; - public class TestDirectoryReader extends LuceneTestCase { protected Directory dir; private Document doc1; @@ -174,8 +174,8 @@ TermsEnum te3 = MultiFields.getTerms(mr3, "body").iterator(null); te3.seekCeil(new BytesRef("wow")); - td = te3.docs(MultiFields.getLiveDocs(mr3), - td); + td = _TestUtil.docs(random, te3, MultiFields.getLiveDocs(mr3), + td); int ret = 0; Index: lucene/src/test/org/apache/lucene/index/TestLongPostings.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestLongPostings.java (revision 1203747) +++ lucene/src/test/org/apache/lucene/index/TestLongPostings.java (working copy) @@ -366,9 +366,19 @@ if (VERBOSE) { System.out.println("\nTEST: iter=" + iter + " doS1=" + doS1 + " term=" + term); } - - final DocsEnum postings = MultiFields.getTermDocsEnum(r, null, "field", new BytesRef(term)); + final DocsEnum docs; + final DocsAndFreqsEnum postings; + + if (options == IndexOptions.DOCS_ONLY) { + docs = MultiFields.getTermDocsEnum(r, null, "field", new BytesRef(term)); + postings = null; + } else { + docs = postings = MultiFields.getTermDocsAndFreqsEnum(r, null, "field", new BytesRef(term)); + assert postings != null; + } + assert docs != null; + int docID = -1; while(docID < DocsEnum.NO_MORE_DOCS) { final int what = random.nextInt(3); @@ -388,7 +398,7 @@ expected++; } } - docID = postings.nextDoc(); + docID = docs.nextDoc(); if (VERBOSE) { System.out.println(" got docID=" + docID); } @@ -397,7 +407,7 @@ break; } - if (random.nextInt(6) == 3) { + if (random.nextInt(6) == 3 && postings != null) { final int freq = postings.freq(); assertTrue(freq >=1 && freq <= 4); } @@ -424,7 +434,7 @@ } } - docID = postings.advance(targetDocID); + docID = docs.advance(targetDocID); if (VERBOSE) { System.out.println(" got docID=" + docID); } @@ -433,7 +443,7 @@ break; } - if (random.nextInt(6) == 3) { + if (random.nextInt(6) == 3 && postings != null) { final int freq = postings.freq(); assertTrue("got invalid freq=" + freq, freq >=1 && freq <= 4); } Index: lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (revision 1203747) +++ lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (working copy) @@ -670,7 +670,7 @@ // should be found exactly assertEquals(TermsEnum.SeekStatus.FOUND, terms.seekCeil(aaaTerm)); - assertEquals(35, countDocs(terms.docs(null, null))); + assertEquals(35, countDocs(_TestUtil.docs(random, terms, null, null))); assertNull(terms.next()); // should hit end of field @@ -682,12 +682,12 @@ assertEquals(TermsEnum.SeekStatus.NOT_FOUND, terms.seekCeil(new BytesRef("a"))); assertTrue(terms.term().bytesEquals(aaaTerm)); - assertEquals(35, countDocs(terms.docs(null, null))); + assertEquals(35, countDocs(_TestUtil.docs(random, terms, null, null))); assertNull(terms.next()); assertEquals(TermsEnum.SeekStatus.FOUND, terms.seekCeil(aaaTerm)); - assertEquals(35, countDocs(terms.docs(null, null))); + assertEquals(35, countDocs(_TestUtil.docs(random, terms, null, null))); assertNull(terms.next()); r.close(); Index: lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java (revision 1203747) +++ lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java (working copy) @@ -24,9 +24,9 @@ import java.util.Set; import org.apache.lucene.analysis.*; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; @@ -38,6 +38,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; public class TestTermVectorsReader extends LuceneTestCase { //Must be lexicographically sorted, will do in setup, versus trying to maintain here @@ -232,7 +233,7 @@ //System.out.println("Term: " + term); assertEquals(testTerms[i], term); - docsEnum = termsEnum.docs(null, docsEnum); + docsEnum = _TestUtil.docs(random, termsEnum, null, docsEnum); assertNotNull(docsEnum); int doc = docsEnum.docID(); assertTrue(doc == -1 || doc == DocIdSetIterator.NO_MORE_DOCS); Index: lucene/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java (revision 1203747) +++ lucene/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java (working copy) @@ -210,9 +210,9 @@ // Quick test to make sure index is not corrupt: IndexReader reader = IndexReader.open(dir, true); DocsEnum tdocs = MultiFields.getTermDocsEnum(reader, - MultiFields.getLiveDocs(reader), - "field", - new BytesRef("aaa")); + MultiFields.getLiveDocs(reader), + "field", + new BytesRef("aaa")); int count = 0; while(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS) { count++; Index: lucene/src/test/org/apache/lucene/index/TestTermdocPerf.java =================================================================== --- lucene/src/test/org/apache/lucene/index/TestTermdocPerf.java (revision 1203747) +++ lucene/src/test/org/apache/lucene/index/TestTermdocPerf.java (working copy) @@ -28,8 +28,9 @@ import org.apache.lucene.document.StringField; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util._TestUtil; class RepeatingTokenStream extends Tokenizer { @@ -121,7 +122,7 @@ DocsEnum tdocs = null; for (int i=0; iScorer for documents matching a Term. */ final class TermScorer extends Scorer { - private final DocsEnum docsEnum; + private final DocsAndFreqsEnum docsAndFreqsEnum; private int doc = -1; private int freq; @@ -34,7 +34,7 @@ private final int[] docs; private final int[] freqs; - private final DocsEnum.BulkReadResult bulkResult; + private final DocsAndFreqsEnum.BulkReadResult bulkResult; private final Similarity.ExactDocScorer docScorer; /** @@ -48,10 +48,10 @@ * The Similarity.ExactDocScorer implementation * to be used for score computations. */ - TermScorer(Weight weight, DocsEnum td, Similarity.ExactDocScorer docScorer) throws IOException { + TermScorer(Weight weight, DocsAndFreqsEnum td, Similarity.ExactDocScorer docScorer) throws IOException { super(weight); this.docScorer = docScorer; - this.docsEnum = td; + this.docsAndFreqsEnum = td; bulkResult = td.getBulkResult(); docs = bulkResult.docs.ints; freqs = bulkResult.freqs.ints; @@ -70,7 +70,7 @@ //System.out.println("TS: collect doc=" + doc); c.collect(doc); // collect score if (++pointer >= pointerMax) { - pointerMax = docsEnum.read(); // refill + pointerMax = docsAndFreqsEnum.readDocsAndFreqs(); // refill if (pointerMax != 0) { pointer = 0; } else { @@ -105,7 +105,7 @@ public int nextDoc() throws IOException { pointer++; if (pointer >= pointerMax) { - pointerMax = docsEnum.read(); // refill + pointerMax = docsAndFreqsEnum.readDocsAndFreqs(); // refill if (pointerMax != 0) { pointer = 0; } else { @@ -127,7 +127,7 @@ /** * Advances to the first match beyond the current whose document number is * greater than or equal to a given target.
- * The implementation uses {@link DocsEnum#advance(int)}. + * The implementation uses {@link DocsAndFreqsEnum#advance(int)}. * * @param target * The target document number. @@ -144,11 +144,11 @@ } // not found in readahead cache, seek underlying stream - int newDoc = docsEnum.advance(target); - //System.out.println("ts.advance docsEnum=" + docsEnum); + int newDoc = docsAndFreqsEnum.advance(target); + //System.out.println("ts.advance docsAndFreqsEnum=" + docsAndFreqsEnum); if (newDoc != NO_MORE_DOCS) { doc = newDoc; - freq = docsEnum.freq(); + freq = docsAndFreqsEnum.freq(); } else { doc = NO_MORE_DOCS; } Index: lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java (revision 1203747) +++ lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java (working copy) @@ -22,13 +22,14 @@ import java.util.Comparator; import java.util.List; +import org.apache.lucene.index.DocsAndFreqsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.FilteredTermsEnum; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.index.FilteredTermsEnum; import org.apache.lucene.util.Attribute; import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.AttributeSource; @@ -263,6 +264,11 @@ } @Override + public DocsAndFreqsEnum docsAndFreqs(Bits liveDocs, DocsAndFreqsEnum reuse) throws IOException { + return actualEnum.docsAndFreqs(liveDocs, reuse); + } + + @Override public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException { return actualEnum.docsAndPositions(liveDocs, reuse); Index: lucene/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java =================================================================== --- lucene/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java (revision 0) +++ lucene/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java (working copy) @@ -0,0 +1,158 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.search.similarities.Similarity; + +/** Expert: A Scorer for documents matching a + * Term. It treats all documents as having + * one occurrenc (tf=1) for the term. + */ + +final class MatchOnlyTermScorer extends Scorer { + private final DocsEnum docsEnum; + private int doc = -1; + + private int pointer; + private int pointerMax; + + private final int[] docs; + private final DocsEnum.BulkReadResult bulkResult; + private final Similarity.ExactDocScorer docScorer; + + /** + * Construct a TermScorer. + * + * @param weight + * The weight of the Term in the query. + * @param td + * An iterator over the documents matching the Term. + * @param docScorer + * The Similarity.ExactDocScorer implementation + * to be used for score computations. + */ + MatchOnlyTermScorer(Weight weight, DocsEnum td, Similarity.ExactDocScorer docScorer) throws IOException { + super(weight); + this.docScorer = docScorer; + this.docsEnum = td; + bulkResult = td.getBulkResult(); + docs = bulkResult.docs.ints; + } + + @Override + public void score(Collector c) throws IOException { + score(c, Integer.MAX_VALUE, nextDoc()); + } + + // firstDocID is ignored since nextDoc() sets 'doc' + @Override + public boolean score(Collector c, int end, int firstDocID) throws IOException { + c.setScorer(this); + while (doc < end) { // for docs in window + //System.out.println("TS: collect doc=" + doc); + c.collect(doc); // collect score + if (++pointer >= pointerMax) { + pointerMax = docsEnum.read(); // refill + if (pointerMax != 0) { + pointer = 0; + } else { + doc = NO_MORE_DOCS; // set to sentinel value + return false; + } + } + doc = docs[pointer]; + } + return true; + } + + @Override + public int docID() { + return doc; + } + + @Override + public float freq() { + return 1.0f; + } + + /** + * Advances to the next document matching the query.
+ * The iterator over the matching documents is buffered using + * {@link TermDocs#read(int[],int[])}. + * + * @return the document matching the query or NO_MORE_DOCS if there are no more documents. + */ + @Override + public int nextDoc() throws IOException { + pointer++; + if (pointer >= pointerMax) { + pointerMax = docsEnum.read(); // refill + if (pointerMax != 0) { + pointer = 0; + } else { + return doc = NO_MORE_DOCS; + } + } + doc = docs[pointer]; + assert doc != NO_MORE_DOCS; + return doc; + } + + @Override + public float score() { + assert doc != NO_MORE_DOCS; + return docScorer.score(doc, 1); + } + + /** + * Advances to the first match beyond the current whose document number is + * greater than or equal to a given target.
+ * The implementation uses {@link DocsEnum#advance(int)}. + * + * @param target + * The target document number. + * @return the matching document or NO_MORE_DOCS if none exist. + */ + @Override + public int advance(int target) throws IOException { + // first scan in cache + for (pointer++; pointer < pointerMax; pointer++) { + if (docs[pointer] >= target) { + return doc = docs[pointer]; + } + } + + // not found in readahead cache, seek underlying stream + int newDoc = docsEnum.advance(target); + //System.out.println("ts.advance docsEnum=" + docsEnum); + if (newDoc != NO_MORE_DOCS) { + doc = newDoc; + } else { + doc = NO_MORE_DOCS; + } + return doc; + } + + /** Returns a string representation of this TermScorer. */ + @Override + public String toString() { return "scorer(" + weight + ")"; } + +} Property changes on: lucene/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native Index: lucene/src/java/org/apache/lucene/search/ConjunctionTermScorer.java =================================================================== --- lucene/src/java/org/apache/lucene/search/ConjunctionTermScorer.java (revision 1203747) +++ lucene/src/java/org/apache/lucene/search/ConjunctionTermScorer.java (working copy) @@ -17,17 +17,19 @@ * limitations under the License. */ +import java.io.IOException; +import java.util.Comparator; + +import org.apache.lucene.index.DocsAndFreqsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.search.similarities.Similarity.ExactDocScorer; import org.apache.lucene.util.ArrayUtil; -import java.io.IOException; -import java.util.Comparator; /** Scorer for conjunctions, sets of terms, all of which are required. */ -final class ConjunctionTermScorer extends Scorer { - private final float coord; - private int lastDoc = -1; - private final DocsAndFreqs[] docsAndFreqs; +class ConjunctionTermScorer extends Scorer { + protected final float coord; + protected int lastDoc = -1; + protected final DocsAndFreqs[] docsAndFreqs; private final DocsAndFreqs lead; ConjunctionTermScorer(Weight weight, float coord, @@ -35,20 +37,20 @@ super(weight); this.coord = coord; this.docsAndFreqs = docsAndFreqs; - // Sort the array the first time to allow the least frequent DocsEnum to + // Sort the array the first time to allow the least frequent DocsAndFreqsEnum to // lead the matching. ArrayUtil.mergeSort(docsAndFreqs, new Comparator() { public int compare(DocsAndFreqs o1, DocsAndFreqs o2) { - return o1.freq - o2.freq; + return o1.docFreq - o2.docFreq; } }); - lead = docsAndFreqs[0]; // least frequent DocsEnum leads the intersection + lead = docsAndFreqs[0]; // least frequent DocsAndFreqsEnum leads the intersection } private int doNext(int doc) throws IOException { do { - if (lead.doc == DocsEnum.NO_MORE_DOCS) { + if (lead.doc == DocsAndFreqsEnum.NO_MORE_DOCS) { return NO_MORE_DOCS; } advanceHead: do { @@ -57,11 +59,11 @@ docsAndFreqs[i].doc = docsAndFreqs[i].docs.advance(doc); } if (docsAndFreqs[i].doc > doc) { - // DocsEnum beyond the current doc - break and advance lead + // DocsAndFreqsEnum beyond the current doc - break and advance lead break advanceHead; } } - // success - all DocsEnums are on the same doc + // success - all DocsAndFreqsEnums are on the same doc return doc; } while (true); // advance head for next iteration @@ -90,20 +92,22 @@ public float score() throws IOException { float sum = 0.0f; for (DocsAndFreqs docs : docsAndFreqs) { - sum += docs.docScorer.score(lastDoc, docs.docs.freq()); + sum += docs.docScorer.score(lastDoc, docs.docsAndFreqs.freq()); } return sum * coord; } static final class DocsAndFreqs { + final DocsAndFreqsEnum docsAndFreqs; final DocsEnum docs; - final int freq; + final int docFreq; final ExactDocScorer docScorer; int doc = -1; - DocsAndFreqs(DocsEnum docs, int freq, ExactDocScorer docScorer) { + DocsAndFreqs(DocsAndFreqsEnum docsAndFreqs, DocsEnum docs, int docFreq, ExactDocScorer docScorer) { + this.docsAndFreqs = docsAndFreqs; this.docs = docs; - this.freq = freq; + this.docFreq = docFreq; this.docScorer = docScorer; } } Index: lucene/src/java/org/apache/lucene/search/TermQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/TermQuery.java (revision 1203747) +++ lucene/src/java/org/apache/lucene/search/TermQuery.java (working copy) @@ -20,19 +20,20 @@ import java.io.IOException; import java.util.Set; +import org.apache.lucene.index.DocsAndFreqsEnum; import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.IndexReader.ReaderContext; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.index.IndexReader.AtomicReaderContext; -import org.apache.lucene.index.IndexReader.ReaderContext; -import org.apache.lucene.index.Term; +import org.apache.lucene.search.similarities.Similarity.ExactDocScorer; import org.apache.lucene.search.similarities.Similarity; -import org.apache.lucene.search.similarities.Similarity.ExactDocScorer; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.TermContext; -import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.ToStringUtils; /** A Query that matches documents containing a term. @@ -83,10 +84,18 @@ if (termsEnum == null) { return null; } - // TODO should we reuse the DocsEnum here? - final DocsEnum docs = termsEnum.docs(acceptDocs, null); - assert docs != null; - return new TermScorer(this, docs, createDocScorer(context)); + // TODO: would be nice if scorer API passed in + // whether it needs scores; then we could do + // MatchOnlyTermScorer if not + DocsAndFreqsEnum docsAndFreqs = termsEnum.docsAndFreqs(acceptDocs, null); + if (docsAndFreqs != null) { + return new TermScorer(this, docsAndFreqs, createDocScorer(context)); + } else { + // Index does not store freq info + DocsEnum docs = termsEnum.docs(acceptDocs, null); + assert docs != null; + return new MatchOnlyTermScorer(this, docs, createDocScorer(context)); + } } /** @@ -120,12 +129,11 @@ @Override public Explanation explain(AtomicReaderContext context, int doc) throws IOException { - IndexReader reader = context.reader; - DocsEnum docs = reader.termDocsEnum(context.reader.getLiveDocs(), term.field(), term.bytes()); - if (docs != null) { - int newDoc = docs.advance(doc); + Scorer scorer = scorer(context, true, false, context.reader.getLiveDocs()); + if (scorer != null) { + int newDoc = scorer.advance(doc); if (newDoc == doc) { - int freq = docs.freq(); + float freq = scorer.freq(); ExactDocScorer docScorer = similarity.exactDocScorer(stats, term.field(), context); ComplexExplanation result = new ComplexExplanation(); result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:"); @@ -136,8 +144,7 @@ return result; } } - - return new ComplexExplanation(false, 0.0f, "no matching term"); + return new ComplexExplanation(false, 0.0f, "no matching term"); } } Index: lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java (revision 1203747) +++ lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java (working copy) @@ -27,6 +27,7 @@ import java.util.WeakHashMap; import org.apache.lucene.index.DocTermOrds; +import org.apache.lucene.index.DocsAndFreqsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; @@ -1025,6 +1026,11 @@ } @Override + public DocsAndFreqsEnum docsAndFreqs(Bits liveDocs, DocsAndFreqsEnum reuse) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException { throw new UnsupportedOperationException(); } Index: lucene/src/java/org/apache/lucene/index/DocTermOrds.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocTermOrds.java (revision 1203747) +++ lucene/src/java/org/apache/lucene/index/DocTermOrds.java (working copy) @@ -658,6 +658,11 @@ } @Override + public DocsAndFreqsEnum docsAndFreqs(Bits liveDocs, DocsAndFreqsEnum reuse) throws IOException { + return termsEnum.docsAndFreqs(liveDocs, reuse); + } + + @Override public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException { return termsEnum.docsAndPositions(liveDocs, reuse); } Index: lucene/src/java/org/apache/lucene/index/DocsAndFreqsEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocsAndFreqsEnum.java (revision 0) +++ lucene/src/java/org/apache/lucene/index/DocsAndFreqsEnum.java (working copy) @@ -0,0 +1,52 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + + +/** Iterates through the documents and term freqs. + * NOTE: you must first call {@link #nextDoc} before using + * any of the per-doc methods (this does not apply to the + * bulk read {@link #read} method). + * + * @lucene.experimental */ +public abstract class DocsAndFreqsEnum extends DocsEnum { + + /** Returns term frequency in the current document. Do + * not call this before {@link #nextDoc} is first called, + * nor after {@link #nextDoc} returns NO_MORE_DOCS. */ + public abstract int freq(); + + public int readDocsAndFreqs() throws IOException { + int count = 0; + final int[] docs = bulkResult.docs.ints; + final int[] freqs = bulkResult.freqs.ints; + while(count < docs.length) { + final int doc = nextDoc(); + if (doc != NO_MORE_DOCS) { + docs[count] = doc; + freqs[count] = freq(); + count++; + } else { + break; + } + } + return count; + } +} Property changes on: lucene/src/java/org/apache/lucene/index/DocsAndFreqsEnum.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native Index: lucene/src/java/org/apache/lucene/index/MultiDocsEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/index/MultiDocsEnum.java (working copy) +++ lucene/src/java/org/apache/lucene/index/MultiDocsEnum.java (working copy) @@ -68,11 +68,6 @@ } @Override - public int freq() { - return current.freq(); - } - - @Override public int docID() { return doc; } Index: lucene/src/java/org/apache/lucene/index/CheckIndex.java =================================================================== --- lucene/src/java/org/apache/lucene/index/CheckIndex.java (revision 1203747) +++ lucene/src/java/org/apache/lucene/index/CheckIndex.java (working copy) @@ -684,6 +684,7 @@ } DocsEnum docs = null; + DocsAndFreqsEnum docsAndFreqs = null; DocsAndPositionsEnum postings = null; final FieldsEnum fieldsEnum = fields.iterator(); @@ -742,6 +743,7 @@ sumDocFreq += docFreq; docs = termsEnum.docs(liveDocs, docs); + docsAndFreqs = termsEnum.docsAndFreqs(liveDocs, docsAndFreqs); postings = termsEnum.docsAndPositions(liveDocs, postings); if (hasOrd) { @@ -763,13 +765,24 @@ status.termCount++; final DocsEnum docs2; + final DocsAndFreqsEnum docsAndFreqs2; final boolean hasPositions; + final boolean hasFreqs; if (postings != null) { docs2 = postings; + docsAndFreqs2 = postings; hasPositions = true; + hasFreqs = true; + } else if (docsAndFreqs != null) { + docs2 = docsAndFreqs; + docsAndFreqs2 = docsAndFreqs; + hasPositions = false; + hasFreqs = true; } else { docs2 = docs; + docsAndFreqs2 = null; hasPositions = false; + hasFreqs = false; } int lastDoc = -1; @@ -781,9 +794,15 @@ break; } visitedDocs.set(doc); - final int freq = docs2.freq(); - status.totPos += freq; - totalTermFreq += freq; + int freq = -1; + if (hasFreqs) { + freq = docsAndFreqs2.freq(); + if (freq <= 0) { + throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); + } + status.totPos += freq; + totalTermFreq += freq; + } docCount++; if (doc <= lastDoc) { @@ -794,12 +813,9 @@ } lastDoc = doc; - if (freq <= 0) { - throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); - } int lastPos = -1; - if (postings != null) { + if (hasPositions) { for(int j=0;jNOTE: the default impl simply delegates to {@link @@ -86,12 +81,10 @@ public int read() throws IOException { int count = 0; final int[] docs = bulkResult.docs.ints; - final int[] freqs = bulkResult.freqs.ints; while(count < docs.length) { final int doc = nextDoc(); if (doc != NO_MORE_DOCS) { docs[count] = doc; - freqs[count] = freq(); count++; } else { break; Index: lucene/src/java/org/apache/lucene/index/FilteredTermsEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/index/FilteredTermsEnum.java (revision 1203747) +++ lucene/src/java/org/apache/lucene/index/FilteredTermsEnum.java (working copy) @@ -169,6 +169,11 @@ public DocsEnum docs(Bits bits, DocsEnum reuse) throws IOException { return tenum.docs(bits, reuse); } + + @Override + public DocsAndFreqsEnum docsAndFreqs(Bits bits, DocsAndFreqsEnum reuse) throws IOException { + return tenum.docsAndFreqs(bits, reuse); + } @Override public DocsAndPositionsEnum docsAndPositions(Bits bits, DocsAndPositionsEnum reuse) throws IOException { Index: lucene/src/java/org/apache/lucene/index/FilterIndexReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (revision 1203747) +++ lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (working copy) @@ -175,6 +175,11 @@ } @Override + public DocsAndFreqsEnum docsAndFreqs(Bits liveDocs, DocsAndFreqsEnum reuse) throws IOException { + return in.docsAndFreqs(liveDocs, reuse); + } + + @Override public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException { return in.docsAndPositions(liveDocs, reuse); } @@ -209,11 +214,6 @@ } @Override - public int freq() { - return in.freq(); - } - - @Override public int nextDoc() throws IOException { return in.nextDoc(); } Index: lucene/src/java/org/apache/lucene/index/TermsEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/index/TermsEnum.java (revision 1203747) +++ lucene/src/java/org/apache/lucene/index/TermsEnum.java (working copy) @@ -154,6 +154,16 @@ * @param reuse pass a prior DocsEnum for possible reuse */ public abstract DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException; + /** Get {@link DocsAndFreqsEnum} for the current term. Do not + * call this when the enum is unpositioned. This method + * will only return null if freqs were not indexed for + * this field. + * + * @param liveDocs unset bits are documents that should not + * be returned + * @param reuse pass a prior DocsEnum for possible reuse */ + public abstract DocsAndFreqsEnum docsAndFreqs(Bits liveDocs, DocsAndFreqsEnum reuse) throws IOException; + /** Get {@link DocsAndPositionsEnum} for the current term. * Do not call this when the enum is unpositioned. * This method will only return null if positions were @@ -232,6 +242,11 @@ public DocsEnum docs(Bits liveDocs, DocsEnum reuse) { throw new IllegalStateException("this method should never be called"); } + + @Override + public DocsAndFreqsEnum docsAndFreqs(Bits liveDocs, DocsAndFreqsEnum reuse) { + throw new IllegalStateException("this method should never be called"); + } @Override public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) { Index: lucene/src/java/org/apache/lucene/index/FieldInfo.java =================================================================== --- lucene/src/java/org/apache/lucene/index/FieldInfo.java (revision 1203747) +++ lucene/src/java/org/apache/lucene/index/FieldInfo.java (working copy) @@ -44,6 +44,7 @@ */ public static enum IndexOptions { /** only documents are indexed: term frequencies and positions are omitted */ + // TODO: maybe rename to just DOCS? DOCS_ONLY, /** only documents and term frequencies are indexed: positions are omitted */ DOCS_AND_FREQS, Index: lucene/src/java/org/apache/lucene/index/IndexReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/IndexReader.java (revision 1203747) +++ lucene/src/java/org/apache/lucene/index/IndexReader.java (working copy) @@ -1049,10 +1049,30 @@ return null; } + /** Returns {@link DocsAndFreqsEnum} for the specified field & + * term. This may return null, if either the field or + * term does not exist, or if term frequences were not + * indexed for this field. */ + public DocsAndFreqsEnum termDocsAndFreqsEnum(Bits liveDocs, String field, BytesRef term) throws IOException { + assert field != null; + assert term != null; + final Fields fields = fields(); + if (fields != null) { + final Terms terms = fields.terms(field); + if (terms != null) { + final TermsEnum termsEnum = terms.iterator(null); + if (termsEnum.seekExact(term, true)) { + return termsEnum.docsAndFreqs(liveDocs, null); + } + } + } + return null; + } + /** Returns {@link DocsAndPositionsEnum} for the specified * field & term. This may return null, if either the * field or term does not exist, or, positions were not - * stored for this term. */ + * indexed for this field. */ public DocsAndPositionsEnum termPositionsEnum(Bits liveDocs, String field, BytesRef term) throws IOException { assert field != null; assert term != null; @@ -1072,7 +1092,7 @@ /** * Returns {@link DocsEnum} for the specified field and * {@link TermState}. This may return null, if either the field or the term - * does not exists or the {@link TermState} is invalid for the underlying + * does not exist or the {@link TermState} is invalid for the underlying * implementation.*/ public DocsEnum termDocsEnum(Bits liveDocs, String field, BytesRef term, TermState state) throws IOException { assert state != null; @@ -1090,6 +1110,27 @@ } /** + * Returns {@link DocsEnum} for the specified field and + * {@link TermState}. This may return null, if either the field or the term + * does not exist, or term frequencies were not indexed + * for this field, or the {@link TermState} is invalid for the underlying + * implementation.*/ + public DocsAndFreqsEnum termDocsAndFreqsEnum(Bits liveDocs, String field, BytesRef term, TermState state) throws IOException { + assert state != null; + assert field != null; + final Fields fields = fields(); + if (fields != null) { + final Terms terms = fields.terms(field); + if (terms != null) { + final TermsEnum termsEnum = terms.iterator(null); + termsEnum.seekExact(term, state); + return termsEnum.docsAndFreqs(liveDocs, null); + } + } + return null; + } + + /** * Returns {@link DocsAndPositionsEnum} for the specified field and * {@link TermState}. This may return null, if either the field or the term * does not exists, the {@link TermState} is invalid for the underlying @@ -1162,7 +1203,9 @@ MultiFields.getLiveDocs(this), term.field(), term.bytes()); - if (docs == null) return 0; + if (docs == null) { + return 0; + } int n = 0; int doc; while ((doc = docs.nextDoc()) != DocsEnum.NO_MORE_DOCS) { Index: lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java (revision 1203747) +++ lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java (working copy) @@ -40,6 +40,7 @@ private final TermsEnumWithSlice[] currentSubs; // current subs that have at least one term for this field private final TermsEnumWithSlice[] top; private final MultiDocsEnum.EnumWithSlice[] subDocs; + private final MultiDocsAndFreqsEnum.EnumWithSlice[] subDocsAndFreqs; private final MultiDocsAndPositionsEnum.EnumWithSlice[] subDocsAndPositions; private BytesRef lastSeek; @@ -76,10 +77,13 @@ subs = new TermsEnumWithSlice[slices.length]; subDocs = new MultiDocsEnum.EnumWithSlice[slices.length]; subDocsAndPositions = new MultiDocsAndPositionsEnum.EnumWithSlice[slices.length]; + subDocsAndFreqs = new MultiDocsAndFreqsEnum.EnumWithSlice[slices.length]; for(int i=0;i>> 1; @@ -371,6 +371,7 @@ @Override public int freq() { + assert indexOptions != IndexOptions.DOCS_ONLY; return freq; } } @@ -603,6 +604,7 @@ public DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException { decodeMetaData(); FSTDocsEnum docsEnum; + if (reuse == null || !(reuse instanceof FSTDocsEnum)) { docsEnum = new FSTDocsEnum(field.indexOptions, field.storePayloads); } else { @@ -615,6 +617,24 @@ } @Override + public DocsAndFreqsEnum docsAndFreqs(Bits liveDocs, DocsAndFreqsEnum reuse) throws IOException { + decodeMetaData(); + FSTDocsEnum docsEnum; + + if (field.indexOptions == IndexOptions.DOCS_ONLY) { + return null; + } else if (reuse == null || !(reuse instanceof FSTDocsEnum)) { + docsEnum = new FSTDocsEnum(field.indexOptions, field.storePayloads); + } else { + docsEnum = (FSTDocsEnum) reuse; + if (!docsEnum.canReuse(field.indexOptions, field.storePayloads)) { + docsEnum = new FSTDocsEnum(field.indexOptions, field.storePayloads); + } + } + return docsEnum.reset(current.output, liveDocs, docFreq); + } + + @Override public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException { if (field.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { return null; Index: lucene/src/java/org/apache/lucene/index/codecs/TermsConsumer.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/TermsConsumer.java (revision 1203747) +++ lucene/src/java/org/apache/lucene/index/codecs/TermsConsumer.java (working copy) @@ -24,6 +24,7 @@ import org.apache.lucene.index.MergeState; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.MultiDocsEnum; +import org.apache.lucene.index.MultiDocsAndFreqsEnum; import org.apache.lucene.index.MultiDocsAndPositionsEnum; import org.apache.lucene.util.BytesRef; @@ -51,8 +52,9 @@ public abstract Comparator getComparator() throws IOException; /** Default merge impl */ - private MappingMultiDocsEnum docsEnum = null; - private MappingMultiDocsAndPositionsEnum postingsEnum = null; + private MappingMultiDocsEnum docsEnum; + private MappingMultiDocsAndFreqsEnum docsAndFreqsEnum; + private MappingMultiDocsAndPositionsEnum postingsEnum; public void merge(MergeState mergeState, TermsEnum termsEnum) throws IOException { @@ -63,7 +65,7 @@ long sumDFsinceLastAbortCheck = 0; FixedBitSet visitedDocs = new FixedBitSet(mergeState.mergedDocCount); - if (mergeState.fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { + if (mergeState.fieldInfo.indexOptions == IndexOptions.DOCS_ONLY) { if (docsEnum == null) { docsEnum = new MappingMultiDocsEnum(); } @@ -81,7 +83,7 @@ final TermStats stats = postingsConsumer.merge(mergeState, docsEnum, visitedDocs); if (stats.docFreq > 0) { finishTerm(term, stats); - sumTotalTermFreq += stats.totalTermFreq; + sumTotalTermFreq += stats.docFreq; sumDFsinceLastAbortCheck += stats.docFreq; sumDocFreq += stats.docFreq; if (sumDFsinceLastAbortCheck > 60000) { @@ -91,7 +93,35 @@ } } } + } else if (mergeState.fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS) { + if (docsAndFreqsEnum == null) { + docsAndFreqsEnum = new MappingMultiDocsAndFreqsEnum(); + } + docsAndFreqsEnum.setMergeState(mergeState); + + MultiDocsAndFreqsEnum docsAndFreqsEnumIn = null; + + while((term = termsEnum.next()) != null) { + // We can pass null for liveDocs, because the + // mapping enum will skip the non-live docs: + docsAndFreqsEnumIn = (MultiDocsAndFreqsEnum) termsEnum.docsAndFreqs(null, docsAndFreqsEnumIn); + assert docsAndFreqsEnumIn != null; + docsAndFreqsEnum.reset(docsAndFreqsEnumIn); + final PostingsConsumer postingsConsumer = startTerm(term); + final TermStats stats = postingsConsumer.merge(mergeState, docsAndFreqsEnum, visitedDocs); + if (stats.docFreq > 0) { + finishTerm(term, stats); + sumTotalTermFreq += stats.totalTermFreq; + sumDFsinceLastAbortCheck += stats.docFreq; + sumDocFreq += stats.docFreq; + if (sumDFsinceLastAbortCheck > 60000) { + mergeState.checkAbort.work(sumDFsinceLastAbortCheck/5.0); + sumDFsinceLastAbortCheck = 0; + } + } + } } else { + assert mergeState.fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; if (postingsEnum == null) { postingsEnum = new MappingMultiDocsAndPositionsEnum(); } @@ -101,27 +131,26 @@ // We can pass null for liveDocs, because the // mapping enum will skip the non-live docs: postingsEnumIn = (MultiDocsAndPositionsEnum) termsEnum.docsAndPositions(null, postingsEnumIn); - if (postingsEnumIn != null) { - postingsEnum.reset(postingsEnumIn); - // set PayloadProcessor - if (mergeState.payloadProcessorProvider != null) { - for (int i = 0; i < mergeState.readers.size(); i++) { - if (mergeState.dirPayloadProcessor[i] != null) { - mergeState.currentPayloadProcessor[i] = mergeState.dirPayloadProcessor[i].getProcessor(mergeState.fieldInfo.name, term); - } + assert postingsEnumIn != null; + postingsEnum.reset(postingsEnumIn); + // set PayloadProcessor + if (mergeState.payloadProcessorProvider != null) { + for (int i = 0; i < mergeState.readers.size(); i++) { + if (mergeState.dirPayloadProcessor[i] != null) { + mergeState.currentPayloadProcessor[i] = mergeState.dirPayloadProcessor[i].getProcessor(mergeState.fieldInfo.name, term); } } - final PostingsConsumer postingsConsumer = startTerm(term); - final TermStats stats = postingsConsumer.merge(mergeState, postingsEnum, visitedDocs); - if (stats.docFreq > 0) { - finishTerm(term, stats); - sumTotalTermFreq += stats.totalTermFreq; - sumDFsinceLastAbortCheck += stats.docFreq; - sumDocFreq += stats.docFreq; - if (sumDFsinceLastAbortCheck > 60000) { - mergeState.checkAbort.work(sumDFsinceLastAbortCheck/5.0); - sumDFsinceLastAbortCheck = 0; - } + } + final PostingsConsumer postingsConsumer = startTerm(term); + final TermStats stats = postingsConsumer.merge(mergeState, postingsEnum, visitedDocs); + if (stats.docFreq > 0) { + finishTerm(term, stats); + sumTotalTermFreq += stats.totalTermFreq; + sumDFsinceLastAbortCheck += stats.docFreq; + sumDocFreq += stats.docFreq; + if (sumDFsinceLastAbortCheck > 60000) { + mergeState.checkAbort.work(sumDFsinceLastAbortCheck/5.0); + sumDFsinceLastAbortCheck = 0; } } } Index: lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java (revision 1203747) +++ lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java (working copy) @@ -23,10 +23,11 @@ import java.util.Iterator; import java.util.TreeMap; +import org.apache.lucene.index.DocsAndFreqsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.FieldsEnum; import org.apache.lucene.index.IndexFileNames; @@ -693,13 +694,22 @@ //System.out.println("BTR.docs this=" + this); decodeMetaData(); //System.out.println("BTR.docs: state.docFreq=" + state.docFreq); - final DocsEnum docsEnum = postingsReader.docs(fieldInfo, state, liveDocs, reuse); - assert docsEnum != null; - //System.out.println("BTR.docs: return docsEnum=" + docsEnum); - return docsEnum; + return postingsReader.docs(fieldInfo, state, liveDocs, reuse); } @Override + public DocsAndFreqsEnum docsAndFreqs(Bits liveDocs, DocsAndFreqsEnum reuse) throws IOException { + //System.out.println("BTR.docs this=" + this); + //System.out.println("BTR.docs: state.docFreq=" + state.docFreq); + if (fieldInfo.indexOptions == IndexOptions.DOCS_ONLY) { + return null; + } else { + decodeMetaData(); + return postingsReader.docsAndFreqs(fieldInfo, state, liveDocs, reuse); + } + } + + @Override public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException { //System.out.println("BTR.d&p this=" + this); decodeMetaData(); Index: lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReader.java (revision 1203747) +++ lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReader.java (working copy) @@ -21,13 +21,14 @@ import java.util.IdentityHashMap; import java.util.Map; +import org.apache.lucene.index.DocsAndFreqsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.TermState; +import org.apache.lucene.index.codecs.BlockTermState; import org.apache.lucene.index.codecs.PostingsReaderBase; -import org.apache.lucene.index.codecs.BlockTermState; import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.ArrayUtil; @@ -212,6 +213,43 @@ } @Override + public DocsAndFreqsEnum docsAndFreqs(FieldInfo field, BlockTermState _termState, Bits liveDocs, DocsAndFreqsEnum reuse) throws IOException { + if (field.indexOptions == IndexOptions.DOCS_ONLY) { + return null; + } + PulsingTermState termState = (PulsingTermState) _termState; + if (termState.postingsSize != -1) { + PulsingDocsEnum postings; + if (reuse instanceof PulsingDocsEnum) { + postings = (PulsingDocsEnum) reuse; + if (!postings.canReuse(field)) { + postings = new PulsingDocsEnum(field); + } + } else { + // the 'reuse' is actually the wrapped enum + PulsingDocsEnum previous = (PulsingDocsEnum) getOther(reuse); + if (previous != null && previous.canReuse(field)) { + postings = previous; + } else { + postings = new PulsingDocsEnum(field); + } + } + if (reuse != postings) { + setOther(postings, reuse); // postings.other = reuse + } + return postings.reset(liveDocs, termState); + } else { + if (reuse instanceof PulsingDocsEnum) { + DocsAndFreqsEnum wrapped = wrappedPostingsReader.docsAndFreqs(field, termState.wrappedTermState, liveDocs, (DocsAndFreqsEnum) getOther(reuse)); + setOther(wrapped, reuse); // wrapped.other = reuse + return wrapped; + } else { + return wrappedPostingsReader.docsAndFreqs(field, termState.wrappedTermState, liveDocs, reuse); + } + } + } + + @Override public DocsAndPositionsEnum docsAndPositions(FieldInfo field, BlockTermState _termState, Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException { if (field.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { return null; @@ -251,7 +289,7 @@ } } - private static class PulsingDocsEnum extends DocsEnum { + private static class PulsingDocsEnum extends DocsAndFreqsEnum { private byte[] postingsBytes; private final ByteArrayDataInput postings = new ByteArrayDataInput(); private final IndexOptions indexOptions; @@ -283,7 +321,6 @@ docID = -1; accum = 0; payloadLength = 0; - freq = 1; this.liveDocs = liveDocs; return this; } @@ -342,6 +379,7 @@ @Override public int freq() { + assert indexOptions != IndexOptions.DOCS_ONLY; return freq; } Index: lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40PostingsReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40PostingsReader.java (revision 1203747) +++ lucene/src/java/org/apache/lucene/index/codecs/lucene40/Lucene40PostingsReader.java (working copy) @@ -20,15 +20,16 @@ import java.io.IOException; import java.util.Collection; +import org.apache.lucene.index.DocsAndFreqsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.TermState; +import org.apache.lucene.index.codecs.BlockTermState; import org.apache.lucene.index.codecs.PostingsReaderBase; -import org.apache.lucene.index.codecs.BlockTermState; import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; @@ -226,6 +227,26 @@ } @Override + public DocsAndFreqsEnum docsAndFreqs(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsAndFreqsEnum reuse) throws IOException { + SegmentDocsEnum docsEnum; + if (fieldInfo.indexOptions == IndexOptions.DOCS_ONLY) { + return null; + } else if (reuse == null || !(reuse instanceof SegmentDocsEnum)) { + docsEnum = new SegmentDocsEnum(freqIn); + } else { + docsEnum = (SegmentDocsEnum) reuse; + if (docsEnum.startFreqIn != freqIn) { + // If you are using ParellelReader, and pass in a + // reused DocsEnum, it could have come from another + // reader also using standard codec + docsEnum = new SegmentDocsEnum(freqIn); + } + } + // if (DEBUG) System.out.println("SPR.docs ts=" + termState); + return docsEnum.reset(fieldInfo, (StandardTermState) termState, liveDocs); + } + + @Override public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException { if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { return null; @@ -263,12 +284,12 @@ } } - // Decodes only docs - private class SegmentDocsEnum extends DocsEnum { + // Decodes only docs + freqs + private class SegmentDocsEnum extends DocsAndFreqsEnum { final IndexInput freqIn; final IndexInput startFreqIn; - boolean omitTF; // does current field omit term freq? + boolean indexOmitsTF; // does current field omit term freq? boolean storePayloads; // does current field store payloads? int limit; // number of docs in this posting @@ -291,10 +312,7 @@ } public SegmentDocsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits liveDocs) throws IOException { - omitTF = fieldInfo.indexOptions == IndexOptions.DOCS_ONLY; - if (omitTF) { - freq = 1; - } + indexOmitsTF = fieldInfo.indexOptions == IndexOptions.DOCS_ONLY; storePayloads = fieldInfo.storePayloads; this.liveDocs = liveDocs; freqOffset = termState.freqOffset; @@ -330,7 +348,7 @@ // Decode next doc/freq pair final int code = freqIn.readVInt(); // if (DEBUG) System.out.println(" code=" + code); - if (omitTF) { + if (indexOmitsTF) { accum += code; } else { accum += code >>> 1; // shift off low bit @@ -338,6 +356,7 @@ freq = 1; // freq is one } else { freq = freqIn.readVInt(); // else read freq + assert freq > 0; } } @@ -361,7 +380,7 @@ ord++; // manually inlined call to next() for speed final int code = freqIn.readVInt(); - if (omitTF) { + if (indexOmitsTF) { accum += code; } else { accum += code >>> 1; // shift off low bit @@ -389,6 +408,7 @@ @Override public int freq() { + assert !indexOmitsTF; return freq; } Index: lucene/src/java/org/apache/lucene/index/codecs/MappingMultiDocsEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/MappingMultiDocsEnum.java (working copy) +++ lucene/src/java/org/apache/lucene/index/codecs/MappingMultiDocsEnum.java (working copy) @@ -52,11 +52,6 @@ } @Override - public int freq() { - return current.freq(); - } - - @Override public int docID() { return doc; } Index: lucene/src/java/org/apache/lucene/index/codecs/DefaultTermVectorsReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/DefaultTermVectorsReader.java (revision 1203747) +++ lucene/src/java/org/apache/lucene/index/codecs/DefaultTermVectorsReader.java (working copy) @@ -26,6 +26,7 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.DocsAndFreqsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo; @@ -515,6 +516,18 @@ } @Override + public DocsAndFreqsEnum docsAndFreqs(Bits liveDocs, DocsAndFreqsEnum reuse) throws IOException { + TVDocsEnum docsEnum; + if (reuse != null && reuse instanceof TVDocsEnum) { + docsEnum = (TVDocsEnum) reuse; + } else { + docsEnum = new TVDocsEnum(); + } + docsEnum.reset(liveDocs, freq); + return docsEnum; + } + + @Override public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException { if (!storePositions && !storeOffsets) { return null; @@ -545,7 +558,7 @@ // NOTE: sort of a silly class, since you can get the // freq() already by TermsEnum.totalTermFreq - private static class TVDocsEnum extends DocsEnum { + private static class TVDocsEnum extends DocsAndFreqsEnum { private boolean didNext; private int doc = -1; private int freq; Index: lucene/src/java/org/apache/lucene/index/codecs/lucene3x/SegmentTermDocs.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/lucene3x/SegmentTermDocs.java (revision 1203747) +++ lucene/src/java/org/apache/lucene/index/codecs/lucene3x/SegmentTermDocs.java (working copy) @@ -112,8 +112,12 @@ } public final int doc() { return doc; } - public final int freq() { return freq; } + public final int freq() { + assert indexOptions != IndexOptions.DOCS_ONLY; + return freq; + } + protected void skippingDoc() throws IOException { } @@ -125,7 +129,6 @@ if (indexOptions == IndexOptions.DOCS_ONLY) { doc += docCode; - freq = 1; } else { doc += docCode >>> 1; // shift off low bit if ((docCode & 1) != 0) // if low bit is set Index: lucene/src/java/org/apache/lucene/index/codecs/lucene3x/Lucene3xFields.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/lucene3x/Lucene3xFields.java (revision 1203747) +++ lucene/src/java/org/apache/lucene/index/codecs/lucene3x/Lucene3xFields.java (working copy) @@ -25,10 +25,11 @@ import java.util.Map; import java.util.TreeMap; +import org.apache.lucene.index.DocsAndFreqsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.FieldsEnum; import org.apache.lucene.index.IndexFileNames; @@ -964,6 +965,22 @@ } @Override + public DocsAndFreqsEnum docsAndFreqs(Bits liveDocs, DocsAndFreqsEnum reuse) throws IOException { + PreDocsEnum docsEnum; + if (fieldInfo.indexOptions == IndexOptions.DOCS_ONLY) { + return null; + } else if (reuse == null || !(reuse instanceof PreDocsEnum)) { + docsEnum = new PreDocsEnum(); + } else { + docsEnum = (PreDocsEnum) reuse; + if (docsEnum.getFreqStream() != freqStream) { + docsEnum = new PreDocsEnum(); + } + } + return docsEnum.reset(termEnum, liveDocs); + } + + @Override public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException { PreDocsAndPositionsEnum docsPosEnum; if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { @@ -980,7 +997,7 @@ } } - private final class PreDocsEnum extends DocsEnum { + private final class PreDocsEnum extends DocsAndFreqsEnum { final private SegmentTermDocs docs; private int docID = -1; PreDocsEnum() throws IOException { Index: lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReader.java (revision 1203747) +++ lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReader.java (working copy) @@ -20,10 +20,11 @@ import java.io.IOException; import java.util.Collection; +import org.apache.lucene.index.DocsAndFreqsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.TermState; @@ -291,6 +292,28 @@ } @Override + public DocsAndFreqsEnum docsAndFreqs(FieldInfo fieldInfo, BlockTermState _termState, Bits liveDocs, DocsAndFreqsEnum reuse) throws IOException { + if (fieldInfo.indexOptions == IndexOptions.DOCS_ONLY) { + return null; + } + final SepTermState termState = (SepTermState) _termState; + SepDocsEnum docsEnum; + if (reuse == null || !(reuse instanceof SepDocsEnum)) { + docsEnum = new SepDocsEnum(); + } else { + docsEnum = (SepDocsEnum) reuse; + if (docsEnum.startDocIn != docIn) { + // If you are using ParellelReader, and pass in a + // reused DocsAndPositionsEnum, it could have come + // from another reader also using sep codec + docsEnum = new SepDocsEnum(); + } + } + + return docsEnum.init(fieldInfo, termState, liveDocs); + } + + @Override public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState _termState, Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException { assert fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; final SepTermState termState = (SepTermState) _termState; @@ -310,7 +333,7 @@ return postingsEnum.init(fieldInfo, termState, liveDocs); } - class SepDocsEnum extends DocsEnum { + class SepDocsEnum extends DocsAndFreqsEnum { int docFreq; int doc = -1; int accum; @@ -369,8 +392,6 @@ if (!omitTF) { freqIndex.set(termState.freqIndex); freqIndex.seek(freqReader); - } else { - freq = 1; } docFreq = termState.docFreq; @@ -440,6 +461,7 @@ @Override public int freq() { + assert !omitTF; return freq; } Index: lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java (revision 1203747) +++ lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java (working copy) @@ -17,33 +17,34 @@ * limitations under the License. */ -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.index.codecs.FieldsProducer; +import java.io.IOException; +import java.util.Comparator; +import java.util.HashMap; +import java.util.Map; + +import org.apache.lucene.index.DocsAndFreqsEnum; +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.FieldsEnum; import org.apache.lucene.index.SegmentReadState; -import org.apache.lucene.index.FieldsEnum; import org.apache.lucene.index.Terms; -import org.apache.lucene.index.DocsEnum; -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.codecs.FieldsProducer; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.OpenBitSet; import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.fst.Builder; import org.apache.lucene.util.fst.BytesRefFSTEnum; import org.apache.lucene.util.fst.FST; +import org.apache.lucene.util.fst.PairOutputs; import org.apache.lucene.util.fst.PositiveIntOutputs; -import org.apache.lucene.util.fst.PairOutputs; -import java.io.IOException; -import java.util.Comparator; -import java.util.Map; -import java.util.HashMap; - class SimpleTextFieldsReader extends FieldsProducer { private final IndexInput in; @@ -196,10 +197,24 @@ } else { docsEnum = new SimpleTextDocsEnum(); } - return docsEnum.reset(docsStart, liveDocs, indexOptions == IndexOptions.DOCS_ONLY); + return docsEnum.reset(docsStart, liveDocs, true); } @Override + public DocsAndFreqsEnum docsAndFreqs(Bits liveDocs, DocsAndFreqsEnum reuse) throws IOException { + if (indexOptions == IndexOptions.DOCS_ONLY) { + return null; + } + SimpleTextDocsEnum docsEnum; + if (reuse != null && reuse instanceof SimpleTextDocsEnum && ((SimpleTextDocsEnum) reuse).canReuse(SimpleTextFieldsReader.this.in)) { + docsEnum = (SimpleTextDocsEnum) reuse; + } else { + docsEnum = new SimpleTextDocsEnum(); + } + return docsEnum.reset(docsStart, liveDocs, false); + } + + @Override public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException { if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { return null; @@ -220,7 +235,7 @@ } } - private class SimpleTextDocsEnum extends DocsEnum { + private class SimpleTextDocsEnum extends DocsAndFreqsEnum { private final IndexInput inStart; private final IndexInput in; private boolean omitTF; @@ -244,9 +259,6 @@ in.seek(fp); this.omitTF = omitTF; docID = -1; - if (omitTF) { - tf = 1; - } return this; } @@ -257,6 +269,7 @@ @Override public int freq() { + assert !omitTF; return tf; } Index: lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextTermVectorsReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextTermVectorsReader.java (revision 1203747) +++ lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextTermVectorsReader.java (working copy) @@ -27,6 +27,7 @@ import java.util.TreeMap; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.index.DocsAndFreqsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfos; @@ -367,6 +368,14 @@ public DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException { // TODO: reuse SimpleTVDocsEnum e = new SimpleTVDocsEnum(); + e.reset(liveDocs, -1); + return e; + } + + @Override + public DocsAndFreqsEnum docsAndFreqs(Bits liveDocs, DocsAndFreqsEnum reuse) throws IOException { + // TODO: reuse + SimpleTVDocsEnum e = new SimpleTVDocsEnum(); e.reset(liveDocs, current.getValue().freq); return e; } @@ -390,7 +399,7 @@ } // note: these two enum classes are exactly like the Default impl... - private static class SimpleTVDocsEnum extends DocsEnum { + private static class SimpleTVDocsEnum extends DocsAndFreqsEnum { private boolean didNext; private int doc = -1; private int freq; Index: lucene/src/java/org/apache/lucene/index/codecs/PostingsConsumer.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/PostingsConsumer.java (revision 1203747) +++ lucene/src/java/org/apache/lucene/index/codecs/PostingsConsumer.java (working copy) @@ -19,10 +19,11 @@ import java.io.IOException; +import org.apache.lucene.index.DocsAndFreqsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.MergeState; -import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FixedBitSet; @@ -63,14 +64,27 @@ int df = 0; long totTF = 0; - if (mergeState.fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { + if (mergeState.fieldInfo.indexOptions == IndexOptions.DOCS_ONLY) { while(true) { final int doc = postings.nextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } visitedDocs.set(doc); - final int freq = postings.freq(); + this.startDoc(doc, 0); + this.finishDoc(); + df++; + } + totTF = -1; + } else if (mergeState.fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS) { + final DocsAndFreqsEnum postingsEnum = (DocsAndFreqsEnum) postings; + while(true) { + final int doc = postingsEnum.nextDoc(); + if (doc == DocIdSetIterator.NO_MORE_DOCS) { + break; + } + visitedDocs.set(doc); + final int freq = postingsEnum.freq(); this.startDoc(doc, freq); this.finishDoc(); df++; Index: lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsReader.java (revision 1203747) +++ lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsReader.java (working copy) @@ -25,6 +25,7 @@ import java.util.Iterator; import java.util.TreeMap; +import org.apache.lucene.index.DocsAndFreqsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo.IndexOptions; @@ -880,6 +881,16 @@ } @Override + public DocsAndFreqsEnum docsAndFreqs(Bits skipDocs, DocsAndFreqsEnum reuse) throws IOException { + if (fieldInfo.indexOptions == IndexOptions.DOCS_ONLY) { + return null; + } else { + currentFrame.decodeMetaData(); + return postingsReader.docsAndFreqs(fieldInfo, currentFrame.termState, skipDocs, reuse); + } + } + + @Override public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException { if (fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { return null; @@ -2090,10 +2101,24 @@ //if (DEBUG) { //System.out.println(" state=" + currentFrame.state); //} - final DocsEnum docsEnum = postingsReader.docs(fieldInfo, currentFrame.state, skipDocs, reuse); + return postingsReader.docs(fieldInfo, currentFrame.state, skipDocs, reuse); + } - assert docsEnum != null; - return docsEnum; + @Override + public DocsAndFreqsEnum docsAndFreqs(Bits skipDocs, DocsAndFreqsEnum reuse) throws IOException { + assert !eof; + //if (DEBUG) { + //System.out.println("BTTR.docs seg=" + segment); + //} + //if (DEBUG) { + //System.out.println(" state=" + currentFrame.state); + //} + if (fieldInfo.indexOptions == IndexOptions.DOCS_ONLY) { + return null; + } else { + currentFrame.decodeMetaData(); + return postingsReader.docsAndFreqs(fieldInfo, currentFrame.state, skipDocs, reuse); + } } @Override Index: lucene/src/java/org/apache/lucene/index/MultiFields.java =================================================================== --- lucene/src/java/org/apache/lucene/index/MultiFields.java (revision 1203747) +++ lucene/src/java/org/apache/lucene/index/MultiFields.java (working copy) @@ -164,6 +164,22 @@ return null; } + /** Returns {@link DocsAndFreqsEnum} for the specified field & + * term. This may return null if the term does not + * exist. */ + public static DocsAndFreqsEnum getTermDocsAndFreqsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term) throws IOException { + assert field != null; + assert term != null; + final Terms terms = getTerms(r, field); + if (terms != null) { + final TermsEnum termsEnum = terms.iterator(null); + if (termsEnum.seekExact(term, true)) { + return termsEnum.docsAndFreqs(liveDocs, null); + } + } + return null; + } + /** Returns {@link DocsAndPositionsEnum} for the specified * field & term. This may return null if the term does * not exist or positions were not indexed. */ Index: lucene/src/test-framework/java/org/apache/lucene/index/codecs/ramonly/RAMOnlyPostingsFormat.java =================================================================== --- lucene/src/test-framework/java/org/apache/lucene/index/codecs/ramonly/RAMOnlyPostingsFormat.java (revision 1203747) +++ lucene/src/test-framework/java/org/apache/lucene/index/codecs/ramonly/RAMOnlyPostingsFormat.java (working copy) @@ -29,6 +29,7 @@ import java.util.TreeMap; import java.util.concurrent.atomic.AtomicInteger; +import org.apache.lucene.index.DocsAndFreqsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo; @@ -388,12 +389,17 @@ } @Override + public DocsAndFreqsEnum docsAndFreqs(Bits liveDocs, DocsAndFreqsEnum reuse) { + return new RAMDocsEnum(ramField.termToDocs.get(current), liveDocs); + } + + @Override public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) { return new RAMDocsAndPositionsEnum(ramField.termToDocs.get(current), liveDocs); } } - private static class RAMDocsEnum extends DocsEnum { + private static class RAMDocsEnum extends DocsAndFreqsEnum { private final RAMTerm ramTerm; private final Bits liveDocs; private RAMDoc current; Index: lucene/src/test-framework/java/org/apache/lucene/util/_TestUtil.java =================================================================== --- lucene/src/test-framework/java/org/apache/lucene/util/_TestUtil.java (revision 1203747) +++ lucene/src/test-framework/java/org/apache/lucene/util/_TestUtil.java (working copy) @@ -37,12 +37,19 @@ import org.apache.lucene.document.Field; import org.apache.lucene.index.CheckIndex; import org.apache.lucene.index.ConcurrentMergeScheduler; +import org.apache.lucene.index.DocsAndFreqsEnum; +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.LogMergePolicy; import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.MergeScheduler; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TieredMergePolicy; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.PostingsFormat; @@ -520,4 +527,88 @@ return doc2; } + + // Returns a DocsEnum, but randomly sometimes uses a + // DocsAndFreqsEnum, DocsAndPositionsEnum. Returns null + // if field/term doesn't exist: + public static DocsEnum docs(Random random, IndexReader r, String field, BytesRef term, Bits liveDocs, DocsEnum reuse) throws IOException { + final Terms terms = MultiFields.getTerms(r, field); + if (terms == null) { + return null; + } + final TermsEnum termsEnum = terms.iterator(null); + if (!termsEnum.seekExact(term, random.nextBoolean())) { + return null; + } + if (random.nextBoolean()) { + if (random.nextBoolean()) { + // TODO: cast re-use to D&PE if we can...? + final DocsAndPositionsEnum docsAndPositions = termsEnum.docsAndPositions(liveDocs, null); + if (docsAndPositions != null) { + return docsAndPositions; + } + } + // TODO: cast re-use to D&FE if we can...? + final DocsAndFreqsEnum docsAndFreqs = termsEnum.docsAndFreqs(liveDocs, null); + if (docsAndFreqs != null) { + return docsAndFreqs; + } + } + return termsEnum.docs(liveDocs, reuse); + } + + // Returns a DocsEnum from a positioned TermsEnum, but + // randomly sometimes uses a DocsAndFreqsEnum, DocsAndPositionsEnum. + public static DocsEnum docs(Random random, TermsEnum termsEnum, Bits liveDocs, DocsEnum reuse) throws IOException { + if (random.nextBoolean()) { + if (random.nextBoolean()) { + // TODO: cast re-use to D&PE if we can...? + final DocsAndPositionsEnum docsAndPositions = termsEnum.docsAndPositions(liveDocs, null); + if (docsAndPositions != null) { + return docsAndPositions; + } + } + // TODO: cast re-use to D&FE if we can...? + final DocsAndFreqsEnum docsAndFreqs = termsEnum.docsAndFreqs(liveDocs, null); + if (docsAndFreqs != null) { + return docsAndFreqs; + } + } + return termsEnum.docs(liveDocs, null); + } + + // Returns a DocsAndFreqsEnum, but randomly sometimes uses a + // DocsAndPositionsEnum. Returns null if field/term + // doesn't exist or no freq info was indexed: + public static DocsAndFreqsEnum docsAndFreqs(Random random, IndexReader r, String field, BytesRef term, Bits liveDocs, DocsAndFreqsEnum reuse) throws IOException { + final Terms terms = MultiFields.getTerms(r, field); + if (terms == null) { + return null; + } + final TermsEnum termsEnum = terms.iterator(null); + if (!termsEnum.seekExact(term, random.nextBoolean())) { + return null; + } + if (random.nextBoolean()) { + // TODO: cast re-use to D&PE if we can...? + final DocsAndPositionsEnum docsAndPositions = termsEnum.docsAndPositions(liveDocs, null); + if (docsAndPositions != null) { + return docsAndPositions; + } + } + return termsEnum.docsAndFreqs(liveDocs, reuse); + } + + // Returns a DocsAndFreqsEnum from a positioned TermsEnum, + // but randomly sometimes uses a DocsAndPositionsEnum. + public static DocsAndFreqsEnum docsAndFreqs(Random random, TermsEnum termsEnum, Bits liveDocs, DocsAndFreqsEnum reuse) throws IOException { + if (random.nextBoolean()) { + // TODO: cast re-use to D&PE if we can...? + final DocsAndPositionsEnum docsAndPositions = termsEnum.docsAndPositions(liveDocs, null); + if (docsAndPositions != null) { + return docsAndPositions; + } + } + return termsEnum.docsAndFreqs(liveDocs, reuse); + } } Index: lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java =================================================================== --- lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java (revision 1203747) +++ lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java (working copy) @@ -24,16 +24,17 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.TextField; +import org.apache.lucene.index.DocsAndFreqsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.Fields; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.TieredMergePolicy; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum.SeekStatus; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.index.TermsEnum.SeekStatus; +import org.apache.lucene.index.TieredMergePolicy; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexOutput; @@ -139,7 +140,7 @@ assertEquals(SeekStatus.FOUND, te.seekCeil(new BytesRef("lazy"))); assertEquals(SeekStatus.FOUND, te.seekCeil(new BytesRef("dog"))); assertEquals(SeekStatus.FOUND, te.seekCeil(new BytesRef("the"))); - DocsEnum de = te.docs(null, null); + DocsAndFreqsEnum de = te.docsAndFreqs(null, null); assertTrue(de.advance(0) != DocsEnum.NO_MORE_DOCS); assertEquals(2, de.freq()); assertTrue(de.advance(1) != DocsEnum.NO_MORE_DOCS); Index: lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java =================================================================== --- lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java (revision 1203747) +++ lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java (working copy) @@ -128,13 +128,20 @@ Terms terms = fields.terms(field); if (terms != null) { TermsEnum termsEnum = terms.iterator(null); + DocsAndFreqsEnum docsAndFreqs = null; DocsEnum docs = null; while(termsEnum.next() != null) { - docs = termsEnum.docs(liveDocs, docs); + docsAndFreqs = termsEnum.docsAndFreqs(liveDocs, docsAndFreqs); + DocsEnum docs2; + if (docsAndFreqs != null) { + docs2 = docsAndFreqs; + } else { + docs2 = docs = termsEnum.docs(liveDocs, docs); + } while(true) { - int docID = docs.nextDoc(); + int docID = docs2.nextDoc(); if (docID != docs.NO_MORE_DOCS) { - termCounts[docID] += docs.freq(); + termCounts[docID] += docsAndFreqs == null ? 1 : docsAndFreqs.freq(); } else { break; } Index: lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java =================================================================== --- lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (revision 1203747) +++ lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (working copy) @@ -35,6 +35,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.document.Document; +import org.apache.lucene.index.DocsAndFreqsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInvertState; @@ -46,9 +47,9 @@ import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermState; -import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.search.Collector; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; @@ -944,6 +945,14 @@ } @Override + public DocsAndFreqsEnum docsAndFreqs(Bits liveDocs, DocsAndFreqsEnum reuse) { + if (reuse == null || !(reuse instanceof MemoryDocsEnum)) { + reuse = new MemoryDocsEnum(); + } + return ((MemoryDocsEnum) reuse).reset(liveDocs, info.sortedTerms[termUpto].getValue()); + } + + @Override public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) { if (reuse == null || !(reuse instanceof MemoryDocsAndPositionsEnum)) { reuse = new MemoryDocsAndPositionsEnum(); @@ -970,12 +979,12 @@ } } - private class MemoryDocsEnum extends DocsEnum { + private class MemoryDocsEnum extends DocsAndFreqsEnum { private ArrayIntList positions; private boolean hasNext; private Bits liveDocs; - public DocsEnum reset(Bits liveDocs, ArrayIntList positions) { + public DocsAndFreqsEnum reset(Bits liveDocs, ArrayIntList positions) { this.liveDocs = liveDocs; this.positions = positions; hasNext = true;