diff -r 83fee3e80001 lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java --- a/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java Mon Oct 25 05:39:16 2010 -0400 +++ b/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java Mon Oct 25 05:39:48 2010 -0400 @@ -168,6 +168,25 @@ } @Override + public void read(final IntIndexInput.Reader indexIn, final boolean absolute) throws IOException { + if (absolute) { + fp = indexIn.readVLong(); + upto = indexIn.next(); + } else { + final long delta = indexIn.readVLong(); + if (delta == 0) { + // same block + upto += indexIn.next(); + } else { + // new block + fp += delta; + upto = indexIn.next(); + } + } + assert upto < blockSize; + } + + @Override public void seek(final IntIndexInput.Reader other) throws IOException { ((Reader) other).seek(fp, upto); } diff -r 83fee3e80001 lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java --- a/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java Mon Oct 25 05:39:16 2010 -0400 +++ b/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java Mon Oct 25 05:39:48 2010 -0400 @@ -83,11 +83,30 @@ // same block indexOut.writeVLong(0); assert upto >= lastUpto; - indexOut.writeVLong(upto - lastUpto); + indexOut.writeVInt(upto - lastUpto); } else { // new block indexOut.writeVLong(fp - lastFP); - indexOut.writeVLong(upto); + indexOut.writeVInt(upto); + } + lastUpto = upto; + lastFP = fp; + } + + @Override + public void write(IntIndexOutput indexOut, boolean absolute) throws IOException { + if (absolute) { + indexOut.writeVLong(fp); + indexOut.write(upto); + } else if (fp == lastFP) { + // same block + indexOut.writeVLong(0); + assert upto >= lastUpto; + indexOut.write(upto - lastUpto); + } else { + // new block + indexOut.writeVLong(fp - lastFP); + indexOut.write(upto); } lastUpto = upto; lastFP = fp; diff -r 83fee3e80001 lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java --- a/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java Mon Oct 25 05:39:16 2010 -0400 +++ b/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java Mon Oct 25 05:39:48 2010 -0400 @@ -189,6 +189,24 @@ } @Override + public void read(final IntIndexInput.Reader indexIn, final boolean absolute) throws IOException { + if (absolute) { + fp = indexIn.readVLong(); + upto = indexIn.next()&0xFF; + } else { + final long delta = indexIn.readVLong(); + if (delta == 0) { + // same block + upto = indexIn.next()&0xFF; + } else { + // new block + fp += delta; + upto = indexIn.next()&0xFF; + } + } + } + + @Override public String toString() { return "VarIntBlock.Index fp=" + fp + " upto=" + upto + " maxBlock=" + maxBlockSize; } diff -r 83fee3e80001 lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java --- a/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java Mon Oct 25 05:39:16 2010 -0400 +++ b/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java Mon Oct 25 05:39:48 2010 -0400 @@ -103,6 +103,26 @@ lastUpto = upto; lastFP = fp; } + + @Override + public void write(IntIndexOutput indexOut, boolean absolute) throws IOException { + assert upto >= 0; + if (absolute) { + indexOut.writeVLong(fp); + indexOut.write(upto); + } else if (fp == lastFP) { + // same block + indexOut.writeVLong(0); + assert upto >= lastUpto; + indexOut.write(upto); + } else { + // new block + indexOut.writeVLong(fp - lastFP); + indexOut.write(upto); + } + lastUpto = upto; + lastFP = fp; + } } @Override diff -r 83fee3e80001 lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java --- a/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java Mon Oct 25 05:39:16 2010 -0400 +++ b/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java Mon Oct 25 05:39:48 2010 -0400 @@ -41,6 +41,8 @@ public abstract void read(IndexInput indexIn, boolean absolute) throws IOException; + public abstract void read(IntIndexInput.Reader indexIn, boolean absolute) throws IOException; + /** Seeks primary stream to the last read offset */ public abstract void seek(IntIndexInput.Reader stream) throws IOException; @@ -54,6 +56,18 @@ /** Reads next single int */ public abstract int next() throws IOException; + /** Encodes as 1 or 2 ints, and can only use 61 of the 64 + * long bits. */ + public long readVLong() throws IOException { + final int v = next(); + if ((v & 1) == 0) { + return v >> 1; + } else { + final long v2 = next(); + return (v2 << 30) | (v >> 1); + } + } + /** Reads next chunk of ints */ private IntsRef bulkResult; diff -r 83fee3e80001 lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexOutput.java --- a/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexOutput.java Mon Oct 25 05:39:16 2010 -0400 +++ b/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexOutput.java Mon Oct 25 05:39:48 2010 -0400 @@ -34,9 +34,27 @@ * @lucene.experimental */ public abstract class IntIndexOutput implements Closeable { - /** Write an int to the primary file */ + /** Write an int to the primary file. The value must be + * >= 0. */ public abstract void write(int v) throws IOException; + public static final long MAX_SINGLE_INT_VLONG = Integer.MAX_VALUE - (1<<30); + public static final long MAX_VLONG = Long.MAX_VALUE - (1L<<62) - (1L<<61); + + /** Encodes as 1 or 2 ints, and can only use 61 of the 64 + * long bits. */ + public void writeVLong(long v) throws IOException { + assert v >= 0: "v=" + v; + assert v < MAX_VLONG: "v=" + v; + // we cannot pass a negative int + if (v <= MAX_SINGLE_INT_VLONG) { + write(((int) v)<<1); + } else { + write(((int) ((v & MAX_SINGLE_INT_VLONG))<<1) | 1); + write(((int) (v >> 30))); + } + } + public abstract static class Index { /** Internally records the current location */ @@ -46,8 +64,10 @@ public abstract void set(Index other) throws IOException; /** Writes "location" of current output pointer of primary - * output to different output (out) */ + * output to different output (out) */ public abstract void write(IndexOutput indexOut, boolean absolute) throws IOException; + + public abstract void write(IntIndexOutput indexOut, boolean absolute) throws IOException; } /** If you are indexing the primary output file, call diff -r 83fee3e80001 lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java --- a/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java Mon Oct 25 05:39:16 2010 -0400 +++ b/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java Mon Oct 25 05:39:48 2010 -0400 @@ -130,21 +130,14 @@ } private static class SepTermState extends TermState { + // We store only the seek point to the docs file because + // the rest of the info (freqIndex, posIndex, etc.) is + // stored in the docs file: IntIndexInput.Index docIndex; - IntIndexInput.Index freqIndex; - IntIndexInput.Index posIndex; - long skipOffset; - long payloadOffset; public Object clone() { SepTermState other = (SepTermState) super.clone(); other.docIndex = (IntIndexInput.Index) docIndex.clone(); - if (freqIndex != null) { - other.freqIndex = (IntIndexInput.Index) freqIndex.clone(); - } - if (posIndex != null) { - other.posIndex = (IntIndexInput.Index) posIndex.clone(); - } return other; } @@ -152,22 +145,6 @@ super.copy(_other); SepTermState other = (SepTermState) _other; docIndex.set(other.docIndex); - if (other.posIndex != null) { - if (posIndex == null) { - posIndex = (IntIndexInput.Index) other.posIndex.clone(); - } else { - posIndex.set(other.posIndex); - } - } - if (other.freqIndex != null) { - if (freqIndex == null) { - freqIndex = (IntIndexInput.Index) other.freqIndex.clone(); - } else { - freqIndex.set(other.freqIndex); - } - } - skipOffset = other.skipOffset; - payloadOffset = other.payloadOffset; } @Override @@ -184,39 +161,8 @@ } @Override - public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, TermState _termState, boolean isIndexTerm) throws IOException { - final SepTermState termState = (SepTermState) _termState; - - // read freq index - if (!fieldInfo.omitTermFreqAndPositions) { - if (termState.freqIndex == null) { - assert isIndexTerm; - termState.freqIndex = freqIn.index(); - termState.posIndex = posIn.index(); - } - termState.freqIndex.read(termsIn, isIndexTerm); - } - - // read doc index - termState.docIndex.read(termsIn, isIndexTerm); - - // read skip index - if (isIndexTerm) { - termState.skipOffset = termsIn.readVLong(); - } else if (termState.docFreq >= skipInterval) { - termState.skipOffset += termsIn.readVLong(); - } - - // read pos, payload index - if (!fieldInfo.omitTermFreqAndPositions) { - termState.posIndex.read(termsIn, isIndexTerm); - final long v = termsIn.readVLong(); - if (isIndexTerm) { - termState.payloadOffset = v; - } else { - termState.payloadOffset += v; - } - } + public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, TermState termState, boolean isIndexTerm) throws IOException { + ((SepTermState) termState).docIndex.read(termsIn, isIndexTerm); } @Override @@ -311,14 +257,18 @@ docIndex.set(termState.docIndex); docIndex.seek(docReader); - skipOffset = termState.skipOffset; - if (!omitTF) { - freqIndex.set(termState.freqIndex); + freqIndex.read(docReader, true); freqIndex.seek(freqReader); + + posIndex.read(docReader, true); + // skip payload offset + docReader.readVLong(); } else { freq = 1; } + skipOffset = docReader.readVLong(); + docFreq = termState.docFreq; count = 0; doc = 0; @@ -498,17 +448,15 @@ docIndex.set(termState.docIndex); docIndex.seek(docReader); - freqIndex.set(termState.freqIndex); + freqIndex.read(docReader, true); freqIndex.seek(freqReader); - posIndex.set(termState.posIndex); + posIndex.read(docReader, true); posSeekPending = true; - //posIndex.seek(posReader); payloadPending = false; - skipOffset = termState.skipOffset; - payloadOffset = termState.payloadOffset; - //payloadIn.seek(payloadOffset); + payloadOffset = docReader.readVLong(); + skipOffset = docReader.readVLong(); docFreq = termState.docFreq; count = 0; diff -r 83fee3e80001 lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java --- a/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java Mon Oct 25 05:39:16 2010 -0400 +++ b/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java Mon Oct 25 05:39:48 2010 -0400 @@ -79,6 +79,7 @@ long lastPayloadStart; int lastDocID; int df; + private boolean firstDoc; public SepPostingsWriterImpl(SegmentWriteState state, IntStreamFactory factory) throws IOException { super(); @@ -147,6 +148,7 @@ payloadStart = payloadOut.getFilePointer(); lastPayloadLength = -1; } + firstDoc = true; skipListWriter.resetSkip(docIndex, freqIndex, posIndex); } @@ -169,6 +171,20 @@ @Override public void startDoc(int docID, int termDocFreq) throws IOException { + if (firstDoc) { + // TODO: we are writing absolute file pointers below, + // which is wasteful. It'd be better compression to + // write the "baseline" into each indexed term, then + // write only the delta here. + if (!omitTF) { + freqIndex.write(docOut, true); + posIndex.write(docOut, true); + docOut.writeVLong(payloadStart); + } + docOut.writeVLong(skipOut.getFilePointer()); + firstDoc = false; + } + final int delta = docID - lastDocID; if (docID < 0 || (df > 0 && delta <= 0)) { @@ -229,42 +245,16 @@ @Override public void finishTerm(int docCount, boolean isIndexTerm) throws IOException { - long skipPos = skipOut.getFilePointer(); - // TODO: -- wasteful we are counting this in two places? assert docCount > 0; assert docCount == df; - // TODO: -- only do this if once (consolidate the - // conditional things that are written) - if (!omitTF) { - freqIndex.write(termsOut, isIndexTerm); - } docIndex.write(termsOut, isIndexTerm); if (df >= skipInterval) { skipListWriter.writeSkip(skipOut); } - if (isIndexTerm) { - termsOut.writeVLong(skipPos); - lastSkipStart = skipPos; - } else if (df >= skipInterval) { - termsOut.writeVLong(skipPos-lastSkipStart); - lastSkipStart = skipPos; - } - - if (!omitTF) { - posIndex.write(termsOut, isIndexTerm); - if (isIndexTerm) { - // Write absolute at seek points - termsOut.writeVLong(payloadStart); - } else { - termsOut.writeVLong(payloadStart-lastPayloadStart); - } - lastPayloadStart = payloadStart; - } - lastDocID = 0; df = 0; } diff -r 83fee3e80001 lucene/src/test/org/apache/lucene/index/TestIntIndexOutput.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/test/org/apache/lucene/index/TestIntIndexOutput.java Mon Oct 25 05:39:48 2010 -0400 @@ -0,0 +1,102 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.index.codecs.sep.IntIndexInput; +import org.apache.lucene.index.codecs.sep.IntIndexOutput; + +public class TestIntIndexOutput extends LuceneTestCase { + + private static class MyIntIndexOutput extends IntIndexOutput { + final IndexOutput out; + + public MyIntIndexOutput(Directory dir) throws IOException { + out = dir.createOutput("it"); + } + + @Override + public void write(int v) throws IOException { + assert v >= 0; + out.writeVInt(v); + } + + @Override + public Index index() { + return null; + } + + @Override + public void close() throws IOException { + out.close(); + } + } + + private static class MyIntIndexInput extends IntIndexInput { + final IndexInput in; + + public MyIntIndexInput(Directory dir) throws IOException { + in = dir.openInput("it"); + } + + @Override + public Reader reader() { + return new Reader() { + @Override + public int next() throws IOException { + return in.readVInt(); + } + }; + } + + @Override + public Index index() { + return null; + } + + @Override + public void close() throws IOException { + in.close(); + } + } + + public void testVLong() throws Exception { + Directory dir = newDirectory(); + final int NUM_VALUES = 100000*RANDOM_MULTIPLIER; + final long[] values = new long[NUM_VALUES]; + IntIndexOutput out = new MyIntIndexOutput(dir); + for(int iter=0;iter