Index: lucene/src/java/org/apache/lucene/index/DocsEnum.java =================================================================== --- lucene/src/java/org/apache/lucene/index/DocsEnum.java (revision 1035388) +++ lucene/src/java/org/apache/lucene/index/DocsEnum.java (working copy) @@ -54,7 +54,7 @@ protected BulkReadResult bulkResult; - protected final void initBulkResult() { + protected void initBulkResult() { if (bulkResult == null) { bulkResult = new BulkReadResult(); bulkResult.docs.ints = new int[64]; Index: lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java (revision 1035473) +++ lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java (working copy) @@ -18,6 +18,7 @@ */ import java.io.IOException; +import java.util.Arrays; import java.util.Collection; import org.apache.lucene.store.Directory; @@ -155,19 +156,35 @@ @Override public DocsEnum docs(FieldInfo fieldInfo, TermState termState, Bits skipDocs, DocsEnum reuse) throws IOException { - SegmentDocsEnum docsEnum; - if (reuse == null || !(reuse instanceof SegmentDocsEnum)) { - docsEnum = new SegmentDocsEnum(freqIn); - } else { - docsEnum = (SegmentDocsEnum) reuse; - if (docsEnum.startFreqIn != freqIn) { - // If you are using ParellelReader, and pass in a - // reused DocsEnum, it could have come from another - // reader also using standard codec + if (fieldInfo.omitTermFreqAndPositions) { + SegmentDocsEnum docsEnum; + if (reuse == null || !(reuse instanceof SegmentDocsEnum)) { docsEnum = new SegmentDocsEnum(freqIn); + } else { + docsEnum = (SegmentDocsEnum) reuse; + if (docsEnum.startFreqIn != freqIn) { + // If you are using ParellelReader, and pass in a + // reused DocsEnum, it could have come from another + // reader also using standard codec + docsEnum = new SegmentDocsEnum(freqIn); + } } + return docsEnum.reset(fieldInfo, (DocTermState) termState, skipDocs); + } else { + SegmentDocsAndFreqsEnum docsEnum; + if (reuse == null || !(reuse instanceof SegmentDocsAndFreqsEnum)) { + docsEnum = new SegmentDocsAndFreqsEnum(freqIn); + } else { + docsEnum = (SegmentDocsAndFreqsEnum) reuse; + if (docsEnum.startFreqIn != freqIn) { + // If you are using ParellelReader, and pass in a + // reused DocsEnum, it could have come from another + // reader also using standard codec + docsEnum = new SegmentDocsAndFreqsEnum(freqIn); + } + } + return docsEnum.reset(fieldInfo, (DocTermState) termState, skipDocs); } - return docsEnum.reset(fieldInfo, (DocTermState) termState, skipDocs); } @Override @@ -208,18 +225,16 @@ } } - // Decodes only docs + // Decodes only docs (omitTF) private class SegmentDocsEnum extends DocsEnum { final IndexInput freqIn; final IndexInput startFreqIn; - boolean omitTF; // does current field omit term freq? boolean storePayloads; // does current field store payloads? int limit; // number of docs in this posting int ord; // how many docs we've read int doc; // doc we last read - int freq; // freq we last read Bits skipDocs; @@ -234,11 +249,170 @@ this.freqIn = (IndexInput) freqIn.clone(); } + protected void initBulkResult() { + super.initBulkResult(); + Arrays.fill(bulkResult.freqs.ints, 1); + } + public SegmentDocsEnum reset(FieldInfo fieldInfo, DocTermState termState, Bits skipDocs) throws IOException { - omitTF = fieldInfo.omitTermFreqAndPositions; - if (omitTF) { - freq = 1; + assert fieldInfo.omitTermFreqAndPositions; + + storePayloads = fieldInfo.storePayloads; + this.skipDocs = skipDocs; + freqOffset = termState.freqOffset; + skipOffset = termState.skipOffset; + + // TODO: for full enum case (eg segment merging) this + // seek is unnecessary; maybe we can avoid in such + // cases + freqIn.seek(termState.freqOffset); + limit = termState.docFreq; + ord = 0; + doc = 0; + + skipped = false; + + return this; + } + + @Override + public int nextDoc() throws IOException { + while(true) { + if (ord == limit) { + return doc = NO_MORE_DOCS; + } + + ord++; + + // Decode next doc/freq pair + doc += freqIn.readVInt(); + + if (skipDocs == null || !skipDocs.get(doc)) { + break; + } } + + return doc; + } + + @Override + public int read() throws IOException { + + final int[] docs = bulkResult.docs.ints; + int i = 0; + final int length = docs.length; + while (i < length && ord < limit) { + ord++; + // manually inlined call to next() for speed + doc += freqIn.readVInt(); + + if (skipDocs == null || !skipDocs.get(doc)) { + docs[i] = doc; + ++i; + } + } + + return i; + } + + @Override + public int docID() { + return doc; + } + + @Override + public int freq() { + return 1; + } + + @Override + public int advance(int target) throws IOException { + + // TODO: jump right to next() if target is < X away + // from where we are now? + + if (skipOffset > 0) { + + // There are enough docs in the posting to have + // skip data + + if (skipper == null) { + // This is the first time this enum has ever been used for skipping -- do lazy init + skipper = new DefaultSkipListReader((IndexInput) freqIn.clone(), maxSkipLevels, skipInterval); + } + + if (!skipped) { + + // This is the first time this posting has + // skipped since reset() was called, so now we + // load the skip data for this posting + + skipper.init(freqOffset + skipOffset, + freqOffset, 0, + limit, storePayloads); + + skipped = true; + } + + final int newOrd = skipper.skipTo(target); + + if (newOrd > ord) { + // Skipper moved + + ord = newOrd; + doc = skipper.getDoc(); + freqIn.seek(skipper.getFreqPointer()); + } + } + + // scan for the rest: + do { + if (ord == limit) { + return doc = NO_MORE_DOCS; + } + + ord++; + + // Decode next doc/freq pair + doc += freqIn.readVInt(); + + } while (target > doc); + + if (skipDocs != null && skipDocs.get(doc)) { + nextDoc(); + } + return doc; + } + } + + // Decodes docs and freqs + private class SegmentDocsAndFreqsEnum extends DocsEnum { + final IndexInput freqIn; + final IndexInput startFreqIn; + + boolean storePayloads; // does current field store payloads? + + int limit; // number of docs in this posting + int ord; // how many docs we've read + int doc; // doc we last read + int freq; // freq we last read + + Bits skipDocs; + + long freqOffset; + int skipOffset; + + boolean skipped; + DefaultSkipListReader skipper; + + public SegmentDocsAndFreqsEnum(IndexInput freqIn) throws IOException { + startFreqIn = freqIn; + this.freqIn = (IndexInput) freqIn.clone(); + } + + public SegmentDocsAndFreqsEnum reset(FieldInfo fieldInfo, DocTermState termState, Bits skipDocs) throws IOException { + assert !fieldInfo.omitTermFreqAndPositions; + storePayloads = fieldInfo.storePayloads; this.skipDocs = skipDocs; freqOffset = termState.freqOffset; @@ -268,15 +442,12 @@ // Decode next doc/freq pair final int code = freqIn.readVInt(); - if (omitTF) { - doc += code; + + doc += code >>> 1; // shift off low bit + if ((code & 1) != 0) { // if low bit is set + freq = 1; // freq is one } else { - doc += code >>> 1; // shift off low bit - if ((code & 1) != 0) { // if low bit is set - freq = 1; // freq is one - } else { - freq = freqIn.readVInt(); // else read freq - } + freq = freqIn.readVInt(); // else read freq } if (skipDocs == null || !skipDocs.get(doc)) { @@ -298,15 +469,12 @@ ord++; // manually inlined call to next() for speed final int code = freqIn.readVInt(); - if (omitTF) { - doc += code; + + doc += code >>> 1; // shift off low bit + if ((code & 1) != 0) { // if low bit is set + freq = 1; // freq is one } else { - doc += code >>> 1; // shift off low bit - if ((code & 1) != 0) { // if low bit is set - freq = 1; // freq is one - } else { - freq = freqIn.readVInt(); // else read freq - } + freq = freqIn.readVInt(); // else read freq } if (skipDocs == null || !skipDocs.get(doc)) { @@ -370,10 +538,36 @@ } // scan for the rest: - do { - nextDoc(); + for (;;) { + if (ord == limit) { + return doc = NO_MORE_DOCS; + } + + ord++; + + // Decode next doc/freq pair + final int code = freqIn.readVInt(); + + doc += code >>> 1; // shift off low bit + if (target > doc) { + if ((code & 1) == 0) { + while ((freqIn.readByte() & 0x80) != 0) // skipVint + ; + } + } else { + if ((code & 1) != 0) { // if low bit is set + freq = 1; // freq is one + } else { + freq = freqIn.readVInt(); // else read freq + } + break; + } } while (target > doc); + if (skipDocs != null && skipDocs.get(doc)) { + nextDoc(); + } + return doc; } }