Index: lucene/src/java/org/apache/lucene/index/SegmentWriteState.java =================================================================== --- lucene/src/java/org/apache/lucene/index/SegmentWriteState.java (revision 1069496) +++ lucene/src/java/org/apache/lucene/index/SegmentWriteState.java (working copy) @@ -53,20 +53,6 @@ * tweaking this is rarely useful.*/ public int termIndexInterval; // TODO: this should be private to the codec, not settable here or in IWC - /** Expert: The fraction of TermDocs entries stored in skip tables, - * used to accelerate {@link DocsEnum#advance(int)}. Larger values result in - * smaller indexes, greater acceleration, but fewer accelerable cases, while - * smaller values result in bigger indexes, less acceleration and more - * accelerable cases. More detailed experiments would be useful here. */ - public final int skipInterval = 16; - - /** Expert: The maximum number of skip levels. Smaller values result in - * slightly smaller indexes, but slower skipping in big posting lists. - */ - public final int maxSkipLevels = 10; - - - public SegmentWriteState(PrintStream infoStream, Directory directory, String segmentName, FieldInfos fieldInfos, int numDocs, int termIndexInterval, SegmentCodecs segmentCodecs, BufferedDeletes segDeletes) { this.infoStream = infoStream; Index: lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java (revision 1069496) +++ lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java (working copy) @@ -23,6 +23,7 @@ import java.io.IOException; import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentWriteState; @@ -44,8 +45,22 @@ final IndexOutput freqOut; final IndexOutput proxOut; final DefaultSkipListWriter skipListWriter; - final int skipInterval; - final int maxSkipLevels; + /** Expert: The fraction of TermDocs entries stored in skip tables, + * used to accelerate {@link DocsEnum#advance(int)}. Larger values result in + * smaller indexes, greater acceleration, but fewer accelerable cases, while + * smaller values result in bigger indexes, less acceleration and more + * accelerable cases. More detailed experiments would be useful here. */ + final int skipInterval = 16; + + /** + * Expert: minimum docFreq to write any skip data at all + */ + final int skipMinimum = skipInterval; + + /** Expert: The maximum number of skip levels. Smaller values result in + * slightly smaller indexes, but slower skipping in big posting lists. + */ + final int maxSkipLevels = 10; final int totalNumDocs; IndexOutput termsOut; @@ -84,14 +99,11 @@ totalNumDocs = state.numDocs; - skipListWriter = new DefaultSkipListWriter(state.skipInterval, - state.maxSkipLevels, + skipListWriter = new DefaultSkipListWriter(skipInterval, + maxSkipLevels, state.numDocs, freqOut, proxOut); - - skipInterval = state.skipInterval; - maxSkipLevels = state.maxSkipLevels; } @Override @@ -100,6 +112,7 @@ CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT); termsOut.writeInt(skipInterval); // write skipInterval termsOut.writeInt(maxSkipLevels); // write maxSkipLevels + termsOut.writeInt(skipMinimum); // write skipMinimum } @Override @@ -218,7 +231,7 @@ } lastFreqStart = freqStart; - if (df >= skipInterval) { + if (df >= skipMinimum) { bytesWriter.writeVInt((int) (skipListWriter.writeSkip(freqOut)-freqStart)); } Index: lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java (revision 1069496) +++ lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java (working copy) @@ -47,6 +47,7 @@ int skipInterval; int maxSkipLevels; + int skipMinimum; //private String segment; @@ -86,6 +87,7 @@ skipInterval = termsIn.readInt(); maxSkipLevels = termsIn.readInt(); + skipMinimum = termsIn.readInt(); } // Must keep final because we do non-standard clone @@ -179,7 +181,7 @@ //System.out.println(" freqFP=" + termState.freqOffset); assert termState.freqOffset < freqIn.length(); - if (termState.docFreq >= skipInterval) { + if (termState.docFreq >= skipMinimum) { termState.skipOffset = termState.bytesReader.readVInt(); //System.out.println(" skipOffset=" + termState.skipOffset + " vs freqIn.length=" + freqIn.length()); assert termState.freqOffset + termState.skipOffset < freqIn.length(); @@ -378,7 +380,7 @@ @Override public int advance(int target) throws IOException { - if ((target - skipInterval) >= doc && limit >= skipInterval) { + if ((target - skipInterval) >= doc && limit >= skipMinimum) { // There are enough docs in the posting to have // skip data, and it isn't too close. @@ -528,7 +530,7 @@ //System.out.println("StandardR.D&PE advance target=" + target); - if ((target - skipInterval) >= doc && limit >= skipInterval) { + if ((target - skipInterval) >= doc && limit >= skipMinimum) { // There are enough docs in the posting to have // skip data, and it isn't too close @@ -725,7 +727,7 @@ //System.out.println("StandardR.D&PE advance seg=" + segment + " target=" + target + " this=" + this); - if ((target - skipInterval) >= doc && limit >= skipInterval) { + if ((target - skipInterval) >= doc && limit >= skipMinimum) { // There are enough docs in the posting to have // skip data, and it isn't too close Index: lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java (revision 1069496) +++ lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java (working copy) @@ -209,7 +209,7 @@ /** Optimized implementation. */ public boolean skipTo(int target) throws IOException { - if (df >= skipInterval) { // optimized case + if ((target - skipInterval) >= doc && df >= skipInterval) { // optimized case if (skipListReader == null) skipListReader = new DefaultSkipListReader((IndexInput) freqStream.clone(), maxSkipLevels, skipInterval); // lazily clone Index: lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (revision 1069496) +++ lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (working copy) @@ -56,6 +56,7 @@ int skipInterval; int maxSkipLevels; + int skipMinimum; public SepPostingsReaderImpl(Directory dir, SegmentInfo segmentInfo, int readBufferSize, IntStreamFactory intFactory, String codecId) throws IOException { @@ -102,6 +103,7 @@ SepPostingsWriterImpl.VERSION_START, SepPostingsWriterImpl.VERSION_START); skipInterval = termsIn.readInt(); maxSkipLevels = termsIn.readInt(); + skipMinimum = termsIn.readInt(); } @Override @@ -231,7 +233,7 @@ //System.out.println(" payloadFP=" + termState.payloadFP); } } - if (termState.docFreq >= skipInterval) { + if (termState.docFreq >= skipMinimum) { //System.out.println(" readSkip @ " + termState.bytesReader.pos); if (isFirstTerm) { termState.skipFP = termState.bytesReader.readVLong(); @@ -344,7 +346,7 @@ } docFreq = termState.docFreq; - // NOTE: unused if docFreq < skipInterval: + // NOTE: unused if docFreq < skipMinimum: skipFP = termState.skipFP; count = 0; doc = 0; @@ -420,13 +422,10 @@ @Override public int advance(int target) throws IOException { - // TODO: jump right to next() if target is < X away - // from where we are now? + if ((target - skipInterval) >= doc && docFreq >= skipMinimum) { - if (docFreq >= skipInterval) { - // There are enough docs in the posting to have - // skip data + // skip data, and its not too close if (skipper == null) { // This DocsEnum has never done any skipping @@ -599,13 +598,10 @@ public int advance(int target) throws IOException { //System.out.println("SepD&P advance target=" + target + " vs current=" + doc + " this=" + this); - // TODO: jump right to next() if target is < X away - // from where we are now? + if ((target - skipInterval) >= doc && docFreq >= skipMinimum) { - if (docFreq >= skipInterval) { - // There are enough docs in the posting to have - // skip data + // skip data, and its not too close if (skipper == null) { //System.out.println(" create skipper"); Index: lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java (revision 1069496) +++ lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java (working copy) @@ -21,6 +21,7 @@ import java.util.Set; import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentWriteState; @@ -63,8 +64,23 @@ IndexOutput termsOut; final SepSkipListWriter skipListWriter; - final int skipInterval; - final int maxSkipLevels; + /** Expert: The fraction of TermDocs entries stored in skip tables, + * used to accelerate {@link DocsEnum#advance(int)}. Larger values result in + * smaller indexes, greater acceleration, but fewer accelerable cases, while + * smaller values result in bigger indexes, less acceleration and more + * accelerable cases. More detailed experiments would be useful here. */ + final int skipInterval = 16; + + /** + * Expert: minimum docFreq to write any skip data at all + */ + final int skipMinimum = skipInterval; + + /** Expert: The maximum number of skip levels. Smaller values result in + * slightly smaller indexes, but slower skipping in big posting lists. + */ + final int maxSkipLevels = 10; + final int totalNumDocs; boolean storePayloads; @@ -118,15 +134,11 @@ totalNumDocs = state.numDocs; - // TODO: -- abstraction violation - skipListWriter = new SepSkipListWriter(state.skipInterval, - state.maxSkipLevels, + skipListWriter = new SepSkipListWriter(skipInterval, + maxSkipLevels, state.numDocs, freqOut, docOut, posOut, payloadOut); - - skipInterval = state.skipInterval; - maxSkipLevels = state.maxSkipLevels; } @Override @@ -136,6 +148,7 @@ // TODO: -- just ask skipper to "start" here termsOut.writeInt(skipInterval); // write skipInterval termsOut.writeInt(maxSkipLevels); // write maxSkipLevels + termsOut.writeInt(skipMinimum); // write skipMinimum } @Override @@ -264,7 +277,7 @@ } } - if (df >= skipInterval) { + if (df >= skipMinimum) { //System.out.println(" skipFP=" + skipStart); final long skipFP = skipOut.getFilePointer(); skipListWriter.writeSkip(skipOut);