Index: lucene/src/java/org/apache/lucene/index/codecs/pfordelta/FORIndexOutput.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/pfordelta/FORIndexOutput.java (revision 1050374) +++ lucene/src/java/org/apache/lucene/index/codecs/pfordelta/FORIndexOutput.java (working copy) @@ -62,7 +62,6 @@ compressor.compress(); final int numBytes = compressor.compressedSize() * 4; assert numBytes <= 1024; - out.writeInt(numBytes); out.writeBytes(output, numBytes); } } Index: lucene/src/java/org/apache/lucene/index/codecs/pfordelta/PForDeltaIndexInput.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/pfordelta/PForDeltaIndexInput.java (revision 1050374) +++ lucene/src/java/org/apache/lucene/index/codecs/pfordelta/PForDeltaIndexInput.java (working copy) @@ -23,8 +23,6 @@ import org.apache.lucene.util.pfor.PForDecompress; import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.IntBuffer; public class PForDeltaIndexInput extends FixedIntBlockIndexInput { @@ -33,23 +31,10 @@ } private static class BlockReader implements FixedIntBlockIndexInput.BlockReader { - private final IndexInput in; - private final int[] buffer; private final PForDecompress decompressor; - private final byte[] input; - private final IntBuffer intInput; public BlockReader(IndexInput in, int[] buffer) { - this.in = in; - this.buffer = buffer; - - decompressor = new PForDecompress(); - // nocommit -- can't hardwire 1024; it's a function of blockSize - ByteBuffer byteBuffer = ByteBuffer.allocate(1024); - input = byteBuffer.array(); - intInput = byteBuffer.asIntBuffer(); - decompressor.setCompressedBuffer(intInput); - decompressor.setUnCompressedData(buffer, 0, buffer.length); + decompressor = new PForDecompress(in, buffer, 0, buffer.length); } public void seek(long pos) throws IOException { @@ -57,11 +42,6 @@ } public void readBlock() throws IOException { - int numBytes = in.readInt(); - //System.out.println("nb=" + numBytes); - // nocommit -- how to avoid this copy? - in.readBytes(input, 0, numBytes); - intInput.rewind(); decompressor.decompress(); } } Index: lucene/src/java/org/apache/lucene/index/codecs/pfordelta/FORIndexInput.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/pfordelta/FORIndexInput.java (revision 1050374) +++ lucene/src/java/org/apache/lucene/index/codecs/pfordelta/FORIndexInput.java (working copy) @@ -23,8 +23,6 @@ import org.apache.lucene.util.pfor.ForDecompress; import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.IntBuffer; public class FORIndexInput extends FixedIntBlockIndexInput { @@ -33,23 +31,10 @@ } private static class BlockReader implements FixedIntBlockIndexInput.BlockReader { - private final IndexInput in; - private final int[] buffer; private final ForDecompress decompressor; - private final byte[] input; - private final IntBuffer intInput; public BlockReader(IndexInput in, int[] buffer) { - this.in = in; - this.buffer = buffer; - - decompressor = new ForDecompress(); - // nocommit -- can't hardwire 1024; it's a function of blockSize - ByteBuffer byteBuffer = ByteBuffer.allocate(1024); - input = byteBuffer.array(); - intInput = byteBuffer.asIntBuffer(); - decompressor.setCompressedBuffer(intInput); - decompressor.setUnCompressedData(buffer, 0, buffer.length); + decompressor = new ForDecompress(in, buffer, 0, buffer.length); } public void seek(long pos) throws IOException { @@ -57,17 +42,6 @@ } public void readBlock() throws IOException { - // nocommit -- we don't need this numBytes header -- - // it's a waste. we need something like the zip - // interface -- the decompressor asks for more bytes - // if it needs it - //System.out.println("for: read @ fp=" + in.getFilePointer()); - int numBytes = in.readInt(); - // nocommit -- how to avoid this copy? plus, the copy - // inside BII. if mmapdir how can we directly access - // underlying ram w/ no copy? - in.readBytes(input, 0, numBytes); - intInput.rewind(); decompressor.decompress(); //System.out.println(" FOR.readBlock"); } Index: lucene/src/java/org/apache/lucene/util/pfor/ForDecompress.java =================================================================== --- lucene/src/java/org/apache/lucene/util/pfor/ForDecompress.java (revision 1050374) +++ lucene/src/java/org/apache/lucene/util/pfor/ForDecompress.java (working copy) @@ -16,8 +16,12 @@ * limitations under the License. */ +import java.io.IOException; +import java.nio.ByteBuffer; import java.nio.IntBuffer; +import org.apache.lucene.store.DataInput; + /** Frame of Reference lossless integer compression/decompression. * For positive integers, the compression is done by leaving out * the most significant bits, and storing all numbers with a fixed number of bits @@ -47,132 +51,75 @@ */ //nocommit: make into static methods without state -public class ForDecompress { - /** Number of frame bits. 2**numFrameBits - 1 is the maximum compressed value. */ - protected int numFrameBits; - +public final class ForDecompress { /** IntBuffer for compressed data */ - protected IntBuffer compressedBuffer; + final IntBuffer compressedBuffer; /** Uncompressed data */ - protected int[] unCompressedData; + final int[] out; /** Offset into unCompressedData */ - protected int offset; + final int offset; /** Size of unCompressedData, -1 when not available. */ - protected int unComprSize = -1; + final int len; - /** Integer buffer to hold the compressed data.
- * Compression and decompression do not affect the current buffer position, - * and the beginning of the compressed data should be or will be at the current - * buffer position.
- * When the buffer is not large enough, ArrayIndexOutOfBoundExceptions will occur - * during compression/decompression.
- * Without a buffer for compressed data, compress() will only determine the number - * of integers needed in the buffer, see compress().
- * Without a valid buffer, decompress() will throw a NullPointerException.
- * For optimal speed when the IntBuffer is a view on a ByteBuffer, - * the IntBuffer should have a byte offset of a multiple of 4 bytes, possibly 0.
- * An IntBuffer is used here because 32 bits can efficiently accessed in the buffer - * on all current processors, and a positive int is normally large enough - * for various purposes in a Lucene index. - * - * @param compressedBuffer The buffer to hold the compressed integers. - * - */ - public void setCompressedBuffer(IntBuffer compressedBuffer) { - this.compressedBuffer = compressedBuffer; - } - - - /** Array with offset holding uncompressed data. - * @param unCompressedData The array holding uncompressed integers. - * @param offset offset in unCompressedData. - * @param unComprSize The number of uncompressed integers, should be at least 1. - */ - public void setUnCompressedData(int[] unCompressedData, int offset, int unComprSize) { - assert unCompressedData != null; - assert offset >= 0; - assert unComprSize >= 1; - assert (offset + unComprSize) <= unCompressedData.length; - this.unCompressedData = unCompressedData; + // nocommit -- can't hardwire 1024; it's a function of blockSize + final ByteBuffer byteBuffer; + final byte input[]; + final DataInput in; + + public ForDecompress(DataInput in, int out[], int offset, int len) { + this.in = in; + this.out = out; this.offset = offset; - this.unComprSize = unComprSize; + this.len = len; + byteBuffer = ByteBuffer.allocate(1024); + input = byteBuffer.array(); + compressedBuffer = byteBuffer.asIntBuffer(); } - /** Return the number of integers used in IntBuffer. - * Only valid after compress() or decompress(). - */ - public int compressedSize() { - return ForConstants.HEADER_SIZE + (unComprSize * numFrameBits + 31) / 32; - } - - protected void decodeHeader() { - int header = compressedBuffer.get(); - // nocommit -- we know this will always be fixed (eg - // 128)? silly to encode in every frame? - //unComprSize = ((header >>> 16) & 255) + 1; - numFrameBits = ((header >>> 8) & 31) + 1; - // verify compression method: - assert ForConstants.FOR_COMPRESSION == ((header >>> 4) & 15); - } - /** Decompress from the buffer into output from a given offset. */ - public void decompress() { - decodeHeader(); - decompressFrame(); - } - - /** Return the number of integers available for decompression. - * Do not use before an IntBuffer was passed to setCompressBuffer. - */ - public int decompressedSize() { - decodeHeader(); - return unComprSize; - } - - /** For performance, this delegates to classes with fixed numFrameBits. */ - private void decompressFrame() { + public void decompress() throws IOException { + int header = in.readInt(); + final int numFrameBits = ((header >>> 8) & 31) + 1; + in.readBytes(input, 0, numFrameBits*16); + compressedBuffer.rewind(); switch (numFrameBits) { // CHECKME: two other implementations might be faster: // - array of static methods: Method[numFrameBits].invoke(null, [this]), // - array of non static decompressors: ForDecompressor[numFrameBits].decompressFrame(this) . - case 1: ForDecompressImpl.decode1(compressedBuffer, unCompressedData); break; - case 2: ForDecompressImpl.decode2(compressedBuffer, unCompressedData); break; - case 3: ForDecompressImpl.decode3(compressedBuffer, unCompressedData); break; - case 4: ForDecompressImpl.decode4(compressedBuffer, unCompressedData); break; - case 5: ForDecompressImpl.decode5(compressedBuffer, unCompressedData); break; - case 6: ForDecompressImpl.decode6(compressedBuffer, unCompressedData); break; - case 7: ForDecompressImpl.decode7(compressedBuffer, unCompressedData); break; - case 8: ForDecompressImpl.decode8(compressedBuffer, unCompressedData); break; - case 9: ForDecompressImpl.decode9(compressedBuffer, unCompressedData); break; - case 10: ForDecompressImpl.decode10(compressedBuffer, unCompressedData); break; - case 11: ForDecompressImpl.decode11(compressedBuffer, unCompressedData); break; - case 12: ForDecompressImpl.decode12(compressedBuffer, unCompressedData); break; - case 13: ForDecompressImpl.decode13(compressedBuffer, unCompressedData); break; - case 14: ForDecompressImpl.decode14(compressedBuffer, unCompressedData); break; - case 15: ForDecompressImpl.decode15(compressedBuffer, unCompressedData); break; - case 16: ForDecompressImpl.decode16(compressedBuffer, unCompressedData); break; - case 17: ForDecompressImpl.decode17(compressedBuffer, unCompressedData); break; - case 18: ForDecompressImpl.decode18(compressedBuffer, unCompressedData); break; - case 19: ForDecompressImpl.decode19(compressedBuffer, unCompressedData); break; - case 20: ForDecompressImpl.decode20(compressedBuffer, unCompressedData); break; - case 21: ForDecompressImpl.decode21(compressedBuffer, unCompressedData); break; - case 22: ForDecompressImpl.decode22(compressedBuffer, unCompressedData); break; - case 23: ForDecompressImpl.decode23(compressedBuffer, unCompressedData); break; - case 24: ForDecompressImpl.decode24(compressedBuffer, unCompressedData); break; - case 25: ForDecompressImpl.decode25(compressedBuffer, unCompressedData); break; - case 26: ForDecompressImpl.decode26(compressedBuffer, unCompressedData); break; - case 27: ForDecompressImpl.decode27(compressedBuffer, unCompressedData); break; - case 28: ForDecompressImpl.decode28(compressedBuffer, unCompressedData); break; - case 29: ForDecompressImpl.decode29(compressedBuffer, unCompressedData); break; - case 30: ForDecompressImpl.decode30(compressedBuffer, unCompressedData); break; - case 31: ForDecompressImpl.decode31(compressedBuffer, unCompressedData); break; + case 1: ForDecompressImpl.decode1(compressedBuffer, out); break; + case 2: ForDecompressImpl.decode2(compressedBuffer, out); break; + case 3: ForDecompressImpl.decode3(compressedBuffer, out); break; + case 4: ForDecompressImpl.decode4(compressedBuffer, out); break; + case 5: ForDecompressImpl.decode5(compressedBuffer, out); break; + case 6: ForDecompressImpl.decode6(compressedBuffer, out); break; + case 7: ForDecompressImpl.decode7(compressedBuffer, out); break; + case 8: ForDecompressImpl.decode8(compressedBuffer, out); break; + case 9: ForDecompressImpl.decode9(compressedBuffer, out); break; + case 10: ForDecompressImpl.decode10(compressedBuffer, out); break; + case 11: ForDecompressImpl.decode11(compressedBuffer, out); break; + case 12: ForDecompressImpl.decode12(compressedBuffer, out); break; + case 13: ForDecompressImpl.decode13(compressedBuffer, out); break; + case 14: ForDecompressImpl.decode14(compressedBuffer, out); break; + case 15: ForDecompressImpl.decode15(compressedBuffer, out); break; + case 16: ForDecompressImpl.decode16(compressedBuffer, out); break; + case 17: ForDecompressImpl.decode17(compressedBuffer, out); break; + case 18: ForDecompressImpl.decode18(compressedBuffer, out); break; + case 19: ForDecompressImpl.decode19(compressedBuffer, out); break; + case 20: ForDecompressImpl.decode20(compressedBuffer, out); break; + case 21: ForDecompressImpl.decode21(compressedBuffer, out); break; + case 22: ForDecompressImpl.decode22(compressedBuffer, out); break; + case 23: ForDecompressImpl.decode23(compressedBuffer, out); break; + case 24: ForDecompressImpl.decode24(compressedBuffer, out); break; + case 25: ForDecompressImpl.decode25(compressedBuffer, out); break; + case 26: ForDecompressImpl.decode26(compressedBuffer, out); break; + case 27: ForDecompressImpl.decode27(compressedBuffer, out); break; + case 28: ForDecompressImpl.decode28(compressedBuffer, out); break; + case 29: ForDecompressImpl.decode29(compressedBuffer, out); break; + case 30: ForDecompressImpl.decode30(compressedBuffer, out); break; + case 31: ForDecompressImpl.decode31(compressedBuffer, out); break; default: throw new IllegalStateException("Unknown number of frame bits " + numFrameBits); } } - - public int getNumFrameBits() { - return numFrameBits; - } } Index: lucene/src/java/org/apache/lucene/util/pfor/PForDecompress.java =================================================================== --- lucene/src/java/org/apache/lucene/util/pfor/PForDecompress.java (revision 1050374) +++ lucene/src/java/org/apache/lucene/util/pfor/PForDecompress.java (working copy) @@ -1,4 +1,10 @@ package org.apache.lucene.util.pfor; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.IntBuffer; + +import org.apache.lucene.store.DataInput; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -17,7 +23,6 @@ */ // nocommit need low level unit tests for this -// nocommit break out decompress seperately? /** Patched Frame of Reference PFOR compression/decompression. *

@@ -55,49 +60,38 @@ * */ //nocommit: make into static methods without state -public class PForDecompress extends ForDecompress { +public final class PForDecompress { + /** IntBuffer for compressed data */ + final IntBuffer compressedBuffer; + + /** Uncompressed data */ + final int[] out; + /** Offset into unCompressedData */ + final int offset; + /** Size of unCompressedData, -1 when not available. */ + final int len; + + // nocommit -- can't hardwire 1024; it's a function of blockSize + final ByteBuffer byteBuffer; + final byte input[]; + final DataInput in; + + public PForDecompress(DataInput in, int out[], int offset, int len) { + this.in = in; + this.out = out; + this.offset = offset; + this.len = len; + byteBuffer = ByteBuffer.allocate(1024); + input = byteBuffer.array(); + compressedBuffer = byteBuffer.asIntBuffer(); + } + /** Index on input and in compressed frame of first exception, -1 when no exceptions */ private int firstExceptionIndex; /** How to encode PFor exceptions: 0: byte, 1: short, 2:int, unused: 3: long */ private int exceptionCode = -1; - - /** Total number of exception values */ - private int numExceptions; - - /** Return the number bytes used for a single exception */ - private int exceptionByteSize() { - assert exceptionCode >= 0; - assert exceptionCode <= 2; - return exceptionCode == 0 ? 1 - : exceptionCode == 1 ? 2 - : 4; - } - - /** Return the number of exceptions. - * Only valid after compress() or decompress(). - */ - public int getNumExceptions() { - return numExceptions; - } - private int compressedArrayByteSize() { - assert unComprSize % 32 == 0; - return (unComprSize>>3)*numFrameBits; - } - - /** Return the number of integers used in IntBuffer. - * Only valid after compress() or decompress(). - */ - @Override - public int compressedSize() { - // numExceptions only valid after compress() or decompress() - return ForConstants.HEADER_SIZE - + ((compressedArrayByteSize() - + exceptionByteSize() * numExceptions - + 3) >> 2); // round up to next multiple of 4 and divide by 4 - } - /** Decode the exception values while going through the exception chain. *
For performance, delegate/subclass this to classes with fixed exceptionCode. *
Also, decoding exceptions is preferably done from an int border instead of @@ -107,7 +101,6 @@ * zero bytes so specialize for these cases. */ private void patchExceptions() { - numExceptions = 0; if (firstExceptionIndex == -1) { return; } @@ -121,7 +114,7 @@ while(true) { final int excValue = (curIntValue >>> firstBitPosition) & ((1 << 8) - 1); excIndex = patch(excIndex, excValue); - if (excIndex >= unComprSize) { + if (excIndex >= len) { break; } firstBitPosition += 8; @@ -134,11 +127,11 @@ break; case 1: { // 2 byte exceptions - while (excIndex < unComprSize) { + while (excIndex < len) { final int curIntValue = compressedBuffer.get(); int excValue = curIntValue & ((1<<16)-1); excIndex = patch(excIndex, excValue); - if (excIndex >= unComprSize) { + if (excIndex >= len) { break; } excValue = curIntValue >>> 16; @@ -150,38 +143,68 @@ case 2: // 4 byte exceptions do { excIndex = patch(excIndex, compressedBuffer.get()); - } while (excIndex < unComprSize); + } while (excIndex < len); break; } } - @Override - protected void decodeHeader() { + /** Decompress from the buffer into output from a given offset. */ + public void decompress() throws IOException { + int numBytes = in.readInt(); // nocommit: is it possible to encode # of exception bytes in header? + in.readBytes(input, 0, numBytes); + compressedBuffer.rewind(); int header = compressedBuffer.get(); + final int numFrameBits = ((header >>> 8) & 31) + 1; + + switch (numFrameBits) { + // CHECKME: two other implementations might be faster: + // - array of static methods: Method[numFrameBits].invoke(null, [this]), + // - array of non static decompressors: ForDecompressor[numFrameBits].decompressFrame(this) . + case 1: ForDecompressImpl.decode1(compressedBuffer, out); break; + case 2: ForDecompressImpl.decode2(compressedBuffer, out); break; + case 3: ForDecompressImpl.decode3(compressedBuffer, out); break; + case 4: ForDecompressImpl.decode4(compressedBuffer, out); break; + case 5: ForDecompressImpl.decode5(compressedBuffer, out); break; + case 6: ForDecompressImpl.decode6(compressedBuffer, out); break; + case 7: ForDecompressImpl.decode7(compressedBuffer, out); break; + case 8: ForDecompressImpl.decode8(compressedBuffer, out); break; + case 9: ForDecompressImpl.decode9(compressedBuffer, out); break; + case 10: ForDecompressImpl.decode10(compressedBuffer, out); break; + case 11: ForDecompressImpl.decode11(compressedBuffer, out); break; + case 12: ForDecompressImpl.decode12(compressedBuffer, out); break; + case 13: ForDecompressImpl.decode13(compressedBuffer, out); break; + case 14: ForDecompressImpl.decode14(compressedBuffer, out); break; + case 15: ForDecompressImpl.decode15(compressedBuffer, out); break; + case 16: ForDecompressImpl.decode16(compressedBuffer, out); break; + case 17: ForDecompressImpl.decode17(compressedBuffer, out); break; + case 18: ForDecompressImpl.decode18(compressedBuffer, out); break; + case 19: ForDecompressImpl.decode19(compressedBuffer, out); break; + case 20: ForDecompressImpl.decode20(compressedBuffer, out); break; + case 21: ForDecompressImpl.decode21(compressedBuffer, out); break; + case 22: ForDecompressImpl.decode22(compressedBuffer, out); break; + case 23: ForDecompressImpl.decode23(compressedBuffer, out); break; + case 24: ForDecompressImpl.decode24(compressedBuffer, out); break; + case 25: ForDecompressImpl.decode25(compressedBuffer, out); break; + case 26: ForDecompressImpl.decode26(compressedBuffer, out); break; + case 27: ForDecompressImpl.decode27(compressedBuffer, out); break; + case 28: ForDecompressImpl.decode28(compressedBuffer, out); break; + case 29: ForDecompressImpl.decode29(compressedBuffer, out); break; + case 30: ForDecompressImpl.decode30(compressedBuffer, out); break; + case 31: ForDecompressImpl.decode31(compressedBuffer, out); break; + default: + throw new IllegalStateException("Unknown number of frame bits " + numFrameBits); + } firstExceptionIndex = ((header >>> 24) & 255) - 1; - //unComprSize = ((header >>> 16) & 255) + 1; - numFrameBits = ((header >>> 8) & 31) + 1; - assert numFrameBits > 0: numFrameBits; - assert numFrameBits <= 32: numFrameBits; - // verify compression method: - assert ForConstants.PFOR_COMPRESSION == ((header >>> 4) & 15); exceptionCode = (header >>> 13) & 3; assert exceptionCode <= 2; - } - - /** Decompress from the buffer into output from a given offset. */ - @Override - public void decompress() { - super.decompress(); patchExceptions(); } /** Patch and return index of next exception */ private int patch(int excIndex, int excValue) { - int nextExceptionIndex = unCompressedData[excIndex] + excIndex + 1; // chain offset - unCompressedData[excIndex + offset] = excValue; // patch + int nextExceptionIndex = out[excIndex] + excIndex + 1; // chain offset + out[excIndex + offset] = excValue; // patch assert nextExceptionIndex > excIndex; - numExceptions++; return nextExceptionIndex; } }