Index: lucene/src/java/org/apache/lucene/index/codecs/pfordelta/FORIndexOutput.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/codecs/pfordelta/FORIndexOutput.java (revision 1050374)
+++ lucene/src/java/org/apache/lucene/index/codecs/pfordelta/FORIndexOutput.java (working copy)
@@ -62,7 +62,6 @@
compressor.compress();
final int numBytes = compressor.compressedSize() * 4;
assert numBytes <= 1024;
- out.writeInt(numBytes);
out.writeBytes(output, numBytes);
}
}
Index: lucene/src/java/org/apache/lucene/index/codecs/pfordelta/PForDeltaIndexInput.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/codecs/pfordelta/PForDeltaIndexInput.java (revision 1050374)
+++ lucene/src/java/org/apache/lucene/index/codecs/pfordelta/PForDeltaIndexInput.java (working copy)
@@ -23,8 +23,6 @@
import org.apache.lucene.util.pfor.PForDecompress;
import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.nio.IntBuffer;
public class PForDeltaIndexInput extends FixedIntBlockIndexInput {
@@ -33,23 +31,10 @@
}
private static class BlockReader implements FixedIntBlockIndexInput.BlockReader {
- private final IndexInput in;
- private final int[] buffer;
private final PForDecompress decompressor;
- private final byte[] input;
- private final IntBuffer intInput;
public BlockReader(IndexInput in, int[] buffer) {
- this.in = in;
- this.buffer = buffer;
-
- decompressor = new PForDecompress();
- // nocommit -- can't hardwire 1024; it's a function of blockSize
- ByteBuffer byteBuffer = ByteBuffer.allocate(1024);
- input = byteBuffer.array();
- intInput = byteBuffer.asIntBuffer();
- decompressor.setCompressedBuffer(intInput);
- decompressor.setUnCompressedData(buffer, 0, buffer.length);
+ decompressor = new PForDecompress(in, buffer, 0, buffer.length);
}
public void seek(long pos) throws IOException {
@@ -57,11 +42,6 @@
}
public void readBlock() throws IOException {
- int numBytes = in.readInt();
- //System.out.println("nb=" + numBytes);
- // nocommit -- how to avoid this copy?
- in.readBytes(input, 0, numBytes);
- intInput.rewind();
decompressor.decompress();
}
}
Index: lucene/src/java/org/apache/lucene/index/codecs/pfordelta/FORIndexInput.java
===================================================================
--- lucene/src/java/org/apache/lucene/index/codecs/pfordelta/FORIndexInput.java (revision 1050374)
+++ lucene/src/java/org/apache/lucene/index/codecs/pfordelta/FORIndexInput.java (working copy)
@@ -23,8 +23,6 @@
import org.apache.lucene.util.pfor.ForDecompress;
import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.nio.IntBuffer;
public class FORIndexInput extends FixedIntBlockIndexInput {
@@ -33,23 +31,10 @@
}
private static class BlockReader implements FixedIntBlockIndexInput.BlockReader {
- private final IndexInput in;
- private final int[] buffer;
private final ForDecompress decompressor;
- private final byte[] input;
- private final IntBuffer intInput;
public BlockReader(IndexInput in, int[] buffer) {
- this.in = in;
- this.buffer = buffer;
-
- decompressor = new ForDecompress();
- // nocommit -- can't hardwire 1024; it's a function of blockSize
- ByteBuffer byteBuffer = ByteBuffer.allocate(1024);
- input = byteBuffer.array();
- intInput = byteBuffer.asIntBuffer();
- decompressor.setCompressedBuffer(intInput);
- decompressor.setUnCompressedData(buffer, 0, buffer.length);
+ decompressor = new ForDecompress(in, buffer, 0, buffer.length);
}
public void seek(long pos) throws IOException {
@@ -57,17 +42,6 @@
}
public void readBlock() throws IOException {
- // nocommit -- we don't need this numBytes header --
- // it's a waste. we need something like the zip
- // interface -- the decompressor asks for more bytes
- // if it needs it
- //System.out.println("for: read @ fp=" + in.getFilePointer());
- int numBytes = in.readInt();
- // nocommit -- how to avoid this copy? plus, the copy
- // inside BII. if mmapdir how can we directly access
- // underlying ram w/ no copy?
- in.readBytes(input, 0, numBytes);
- intInput.rewind();
decompressor.decompress();
//System.out.println(" FOR.readBlock");
}
Index: lucene/src/java/org/apache/lucene/util/pfor/ForDecompress.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/pfor/ForDecompress.java (revision 1050374)
+++ lucene/src/java/org/apache/lucene/util/pfor/ForDecompress.java (working copy)
@@ -16,8 +16,12 @@
* limitations under the License.
*/
+import java.io.IOException;
+import java.nio.ByteBuffer;
import java.nio.IntBuffer;
+import org.apache.lucene.store.DataInput;
+
/** Frame of Reference lossless integer compression/decompression.
* For positive integers, the compression is done by leaving out
* the most significant bits, and storing all numbers with a fixed number of bits
@@ -47,132 +51,75 @@
*/
//nocommit: make into static methods without state
-public class ForDecompress {
- /** Number of frame bits. 2**numFrameBits - 1 is the maximum compressed value. */
- protected int numFrameBits;
-
+public final class ForDecompress {
/** IntBuffer for compressed data */
- protected IntBuffer compressedBuffer;
+ final IntBuffer compressedBuffer;
/** Uncompressed data */
- protected int[] unCompressedData;
+ final int[] out;
/** Offset into unCompressedData */
- protected int offset;
+ final int offset;
/** Size of unCompressedData, -1 when not available. */
- protected int unComprSize = -1;
+ final int len;
- /** Integer buffer to hold the compressed data.
- * Compression and decompression do not affect the current buffer position,
- * and the beginning of the compressed data should be or will be at the current
- * buffer position.
- * When the buffer is not large enough, ArrayIndexOutOfBoundExceptions will occur
- * during compression/decompression.
- * Without a buffer for compressed data, compress() will only determine the number
- * of integers needed in the buffer, see compress().
- * Without a valid buffer, decompress() will throw a NullPointerException.
- * For optimal speed when the IntBuffer is a view on a ByteBuffer,
- * the IntBuffer should have a byte offset of a multiple of 4 bytes, possibly 0.
- * An IntBuffer is used here because 32 bits can efficiently accessed in the buffer
- * on all current processors, and a positive int is normally large enough
- * for various purposes in a Lucene index.
- *
- * @param compressedBuffer The buffer to hold the compressed integers.
- *
- */
- public void setCompressedBuffer(IntBuffer compressedBuffer) {
- this.compressedBuffer = compressedBuffer;
- }
-
-
- /** Array with offset holding uncompressed data.
- * @param unCompressedData The array holding uncompressed integers.
- * @param offset offset in unCompressedData.
- * @param unComprSize The number of uncompressed integers, should be at least 1.
- */
- public void setUnCompressedData(int[] unCompressedData, int offset, int unComprSize) {
- assert unCompressedData != null;
- assert offset >= 0;
- assert unComprSize >= 1;
- assert (offset + unComprSize) <= unCompressedData.length;
- this.unCompressedData = unCompressedData;
+ // nocommit -- can't hardwire 1024; it's a function of blockSize
+ final ByteBuffer byteBuffer;
+ final byte input[];
+ final DataInput in;
+
+ public ForDecompress(DataInput in, int out[], int offset, int len) {
+ this.in = in;
+ this.out = out;
this.offset = offset;
- this.unComprSize = unComprSize;
+ this.len = len;
+ byteBuffer = ByteBuffer.allocate(1024);
+ input = byteBuffer.array();
+ compressedBuffer = byteBuffer.asIntBuffer();
}
- /** Return the number of integers used in IntBuffer.
- * Only valid after compress() or decompress().
- */
- public int compressedSize() {
- return ForConstants.HEADER_SIZE + (unComprSize * numFrameBits + 31) / 32;
- }
-
- protected void decodeHeader() {
- int header = compressedBuffer.get();
- // nocommit -- we know this will always be fixed (eg
- // 128)? silly to encode in every frame?
- //unComprSize = ((header >>> 16) & 255) + 1;
- numFrameBits = ((header >>> 8) & 31) + 1;
- // verify compression method:
- assert ForConstants.FOR_COMPRESSION == ((header >>> 4) & 15);
- }
-
/** Decompress from the buffer into output from a given offset. */
- public void decompress() {
- decodeHeader();
- decompressFrame();
- }
-
- /** Return the number of integers available for decompression.
- * Do not use before an IntBuffer was passed to setCompressBuffer.
- */
- public int decompressedSize() {
- decodeHeader();
- return unComprSize;
- }
-
- /** For performance, this delegates to classes with fixed numFrameBits. */
- private void decompressFrame() {
+ public void decompress() throws IOException {
+ int header = in.readInt();
+ final int numFrameBits = ((header >>> 8) & 31) + 1;
+ in.readBytes(input, 0, numFrameBits*16);
+ compressedBuffer.rewind();
switch (numFrameBits) {
// CHECKME: two other implementations might be faster:
// - array of static methods: Method[numFrameBits].invoke(null, [this]),
// - array of non static decompressors: ForDecompressor[numFrameBits].decompressFrame(this) .
- case 1: ForDecompressImpl.decode1(compressedBuffer, unCompressedData); break;
- case 2: ForDecompressImpl.decode2(compressedBuffer, unCompressedData); break;
- case 3: ForDecompressImpl.decode3(compressedBuffer, unCompressedData); break;
- case 4: ForDecompressImpl.decode4(compressedBuffer, unCompressedData); break;
- case 5: ForDecompressImpl.decode5(compressedBuffer, unCompressedData); break;
- case 6: ForDecompressImpl.decode6(compressedBuffer, unCompressedData); break;
- case 7: ForDecompressImpl.decode7(compressedBuffer, unCompressedData); break;
- case 8: ForDecompressImpl.decode8(compressedBuffer, unCompressedData); break;
- case 9: ForDecompressImpl.decode9(compressedBuffer, unCompressedData); break;
- case 10: ForDecompressImpl.decode10(compressedBuffer, unCompressedData); break;
- case 11: ForDecompressImpl.decode11(compressedBuffer, unCompressedData); break;
- case 12: ForDecompressImpl.decode12(compressedBuffer, unCompressedData); break;
- case 13: ForDecompressImpl.decode13(compressedBuffer, unCompressedData); break;
- case 14: ForDecompressImpl.decode14(compressedBuffer, unCompressedData); break;
- case 15: ForDecompressImpl.decode15(compressedBuffer, unCompressedData); break;
- case 16: ForDecompressImpl.decode16(compressedBuffer, unCompressedData); break;
- case 17: ForDecompressImpl.decode17(compressedBuffer, unCompressedData); break;
- case 18: ForDecompressImpl.decode18(compressedBuffer, unCompressedData); break;
- case 19: ForDecompressImpl.decode19(compressedBuffer, unCompressedData); break;
- case 20: ForDecompressImpl.decode20(compressedBuffer, unCompressedData); break;
- case 21: ForDecompressImpl.decode21(compressedBuffer, unCompressedData); break;
- case 22: ForDecompressImpl.decode22(compressedBuffer, unCompressedData); break;
- case 23: ForDecompressImpl.decode23(compressedBuffer, unCompressedData); break;
- case 24: ForDecompressImpl.decode24(compressedBuffer, unCompressedData); break;
- case 25: ForDecompressImpl.decode25(compressedBuffer, unCompressedData); break;
- case 26: ForDecompressImpl.decode26(compressedBuffer, unCompressedData); break;
- case 27: ForDecompressImpl.decode27(compressedBuffer, unCompressedData); break;
- case 28: ForDecompressImpl.decode28(compressedBuffer, unCompressedData); break;
- case 29: ForDecompressImpl.decode29(compressedBuffer, unCompressedData); break;
- case 30: ForDecompressImpl.decode30(compressedBuffer, unCompressedData); break;
- case 31: ForDecompressImpl.decode31(compressedBuffer, unCompressedData); break;
+ case 1: ForDecompressImpl.decode1(compressedBuffer, out); break;
+ case 2: ForDecompressImpl.decode2(compressedBuffer, out); break;
+ case 3: ForDecompressImpl.decode3(compressedBuffer, out); break;
+ case 4: ForDecompressImpl.decode4(compressedBuffer, out); break;
+ case 5: ForDecompressImpl.decode5(compressedBuffer, out); break;
+ case 6: ForDecompressImpl.decode6(compressedBuffer, out); break;
+ case 7: ForDecompressImpl.decode7(compressedBuffer, out); break;
+ case 8: ForDecompressImpl.decode8(compressedBuffer, out); break;
+ case 9: ForDecompressImpl.decode9(compressedBuffer, out); break;
+ case 10: ForDecompressImpl.decode10(compressedBuffer, out); break;
+ case 11: ForDecompressImpl.decode11(compressedBuffer, out); break;
+ case 12: ForDecompressImpl.decode12(compressedBuffer, out); break;
+ case 13: ForDecompressImpl.decode13(compressedBuffer, out); break;
+ case 14: ForDecompressImpl.decode14(compressedBuffer, out); break;
+ case 15: ForDecompressImpl.decode15(compressedBuffer, out); break;
+ case 16: ForDecompressImpl.decode16(compressedBuffer, out); break;
+ case 17: ForDecompressImpl.decode17(compressedBuffer, out); break;
+ case 18: ForDecompressImpl.decode18(compressedBuffer, out); break;
+ case 19: ForDecompressImpl.decode19(compressedBuffer, out); break;
+ case 20: ForDecompressImpl.decode20(compressedBuffer, out); break;
+ case 21: ForDecompressImpl.decode21(compressedBuffer, out); break;
+ case 22: ForDecompressImpl.decode22(compressedBuffer, out); break;
+ case 23: ForDecompressImpl.decode23(compressedBuffer, out); break;
+ case 24: ForDecompressImpl.decode24(compressedBuffer, out); break;
+ case 25: ForDecompressImpl.decode25(compressedBuffer, out); break;
+ case 26: ForDecompressImpl.decode26(compressedBuffer, out); break;
+ case 27: ForDecompressImpl.decode27(compressedBuffer, out); break;
+ case 28: ForDecompressImpl.decode28(compressedBuffer, out); break;
+ case 29: ForDecompressImpl.decode29(compressedBuffer, out); break;
+ case 30: ForDecompressImpl.decode30(compressedBuffer, out); break;
+ case 31: ForDecompressImpl.decode31(compressedBuffer, out); break;
default:
throw new IllegalStateException("Unknown number of frame bits " + numFrameBits);
}
}
-
- public int getNumFrameBits() {
- return numFrameBits;
- }
}
Index: lucene/src/java/org/apache/lucene/util/pfor/PForDecompress.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/pfor/PForDecompress.java (revision 1050374)
+++ lucene/src/java/org/apache/lucene/util/pfor/PForDecompress.java (working copy)
@@ -1,4 +1,10 @@
package org.apache.lucene.util.pfor;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.IntBuffer;
+
+import org.apache.lucene.store.DataInput;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -17,7 +23,6 @@
*/
// nocommit need low level unit tests for this
-// nocommit break out decompress seperately?
/** Patched Frame of Reference PFOR compression/decompression.
*
@@ -55,49 +60,38 @@
*
*/
//nocommit: make into static methods without state
-public class PForDecompress extends ForDecompress {
+public final class PForDecompress {
+ /** IntBuffer for compressed data */
+ final IntBuffer compressedBuffer;
+
+ /** Uncompressed data */
+ final int[] out;
+ /** Offset into unCompressedData */
+ final int offset;
+ /** Size of unCompressedData, -1 when not available. */
+ final int len;
+
+ // nocommit -- can't hardwire 1024; it's a function of blockSize
+ final ByteBuffer byteBuffer;
+ final byte input[];
+ final DataInput in;
+
+ public PForDecompress(DataInput in, int out[], int offset, int len) {
+ this.in = in;
+ this.out = out;
+ this.offset = offset;
+ this.len = len;
+ byteBuffer = ByteBuffer.allocate(1024);
+ input = byteBuffer.array();
+ compressedBuffer = byteBuffer.asIntBuffer();
+ }
+
/** Index on input and in compressed frame of first exception, -1 when no exceptions */
private int firstExceptionIndex;
/** How to encode PFor exceptions: 0: byte, 1: short, 2:int, unused: 3: long */
private int exceptionCode = -1;
-
- /** Total number of exception values */
- private int numExceptions;
-
- /** Return the number bytes used for a single exception */
- private int exceptionByteSize() {
- assert exceptionCode >= 0;
- assert exceptionCode <= 2;
- return exceptionCode == 0 ? 1
- : exceptionCode == 1 ? 2
- : 4;
- }
-
- /** Return the number of exceptions.
- * Only valid after compress() or decompress().
- */
- public int getNumExceptions() {
- return numExceptions;
- }
- private int compressedArrayByteSize() {
- assert unComprSize % 32 == 0;
- return (unComprSize>>3)*numFrameBits;
- }
-
- /** Return the number of integers used in IntBuffer.
- * Only valid after compress() or decompress().
- */
- @Override
- public int compressedSize() {
- // numExceptions only valid after compress() or decompress()
- return ForConstants.HEADER_SIZE
- + ((compressedArrayByteSize()
- + exceptionByteSize() * numExceptions
- + 3) >> 2); // round up to next multiple of 4 and divide by 4
- }
-
/** Decode the exception values while going through the exception chain.
*
For performance, delegate/subclass this to classes with fixed exceptionCode.
*
Also, decoding exceptions is preferably done from an int border instead of
@@ -107,7 +101,6 @@
* zero bytes so specialize for these cases.
*/
private void patchExceptions() {
- numExceptions = 0;
if (firstExceptionIndex == -1) {
return;
}
@@ -121,7 +114,7 @@
while(true) {
final int excValue = (curIntValue >>> firstBitPosition) & ((1 << 8) - 1);
excIndex = patch(excIndex, excValue);
- if (excIndex >= unComprSize) {
+ if (excIndex >= len) {
break;
}
firstBitPosition += 8;
@@ -134,11 +127,11 @@
break;
case 1: { // 2 byte exceptions
- while (excIndex < unComprSize) {
+ while (excIndex < len) {
final int curIntValue = compressedBuffer.get();
int excValue = curIntValue & ((1<<16)-1);
excIndex = patch(excIndex, excValue);
- if (excIndex >= unComprSize) {
+ if (excIndex >= len) {
break;
}
excValue = curIntValue >>> 16;
@@ -150,38 +143,68 @@
case 2: // 4 byte exceptions
do {
excIndex = patch(excIndex, compressedBuffer.get());
- } while (excIndex < unComprSize);
+ } while (excIndex < len);
break;
}
}
- @Override
- protected void decodeHeader() {
+ /** Decompress from the buffer into output from a given offset. */
+ public void decompress() throws IOException {
+ int numBytes = in.readInt(); // nocommit: is it possible to encode # of exception bytes in header?
+ in.readBytes(input, 0, numBytes);
+ compressedBuffer.rewind();
int header = compressedBuffer.get();
+ final int numFrameBits = ((header >>> 8) & 31) + 1;
+
+ switch (numFrameBits) {
+ // CHECKME: two other implementations might be faster:
+ // - array of static methods: Method[numFrameBits].invoke(null, [this]),
+ // - array of non static decompressors: ForDecompressor[numFrameBits].decompressFrame(this) .
+ case 1: ForDecompressImpl.decode1(compressedBuffer, out); break;
+ case 2: ForDecompressImpl.decode2(compressedBuffer, out); break;
+ case 3: ForDecompressImpl.decode3(compressedBuffer, out); break;
+ case 4: ForDecompressImpl.decode4(compressedBuffer, out); break;
+ case 5: ForDecompressImpl.decode5(compressedBuffer, out); break;
+ case 6: ForDecompressImpl.decode6(compressedBuffer, out); break;
+ case 7: ForDecompressImpl.decode7(compressedBuffer, out); break;
+ case 8: ForDecompressImpl.decode8(compressedBuffer, out); break;
+ case 9: ForDecompressImpl.decode9(compressedBuffer, out); break;
+ case 10: ForDecompressImpl.decode10(compressedBuffer, out); break;
+ case 11: ForDecompressImpl.decode11(compressedBuffer, out); break;
+ case 12: ForDecompressImpl.decode12(compressedBuffer, out); break;
+ case 13: ForDecompressImpl.decode13(compressedBuffer, out); break;
+ case 14: ForDecompressImpl.decode14(compressedBuffer, out); break;
+ case 15: ForDecompressImpl.decode15(compressedBuffer, out); break;
+ case 16: ForDecompressImpl.decode16(compressedBuffer, out); break;
+ case 17: ForDecompressImpl.decode17(compressedBuffer, out); break;
+ case 18: ForDecompressImpl.decode18(compressedBuffer, out); break;
+ case 19: ForDecompressImpl.decode19(compressedBuffer, out); break;
+ case 20: ForDecompressImpl.decode20(compressedBuffer, out); break;
+ case 21: ForDecompressImpl.decode21(compressedBuffer, out); break;
+ case 22: ForDecompressImpl.decode22(compressedBuffer, out); break;
+ case 23: ForDecompressImpl.decode23(compressedBuffer, out); break;
+ case 24: ForDecompressImpl.decode24(compressedBuffer, out); break;
+ case 25: ForDecompressImpl.decode25(compressedBuffer, out); break;
+ case 26: ForDecompressImpl.decode26(compressedBuffer, out); break;
+ case 27: ForDecompressImpl.decode27(compressedBuffer, out); break;
+ case 28: ForDecompressImpl.decode28(compressedBuffer, out); break;
+ case 29: ForDecompressImpl.decode29(compressedBuffer, out); break;
+ case 30: ForDecompressImpl.decode30(compressedBuffer, out); break;
+ case 31: ForDecompressImpl.decode31(compressedBuffer, out); break;
+ default:
+ throw new IllegalStateException("Unknown number of frame bits " + numFrameBits);
+ }
firstExceptionIndex = ((header >>> 24) & 255) - 1;
- //unComprSize = ((header >>> 16) & 255) + 1;
- numFrameBits = ((header >>> 8) & 31) + 1;
- assert numFrameBits > 0: numFrameBits;
- assert numFrameBits <= 32: numFrameBits;
- // verify compression method:
- assert ForConstants.PFOR_COMPRESSION == ((header >>> 4) & 15);
exceptionCode = (header >>> 13) & 3;
assert exceptionCode <= 2;
- }
-
- /** Decompress from the buffer into output from a given offset. */
- @Override
- public void decompress() {
- super.decompress();
patchExceptions();
}
/** Patch and return index of next exception */
private int patch(int excIndex, int excValue) {
- int nextExceptionIndex = unCompressedData[excIndex] + excIndex + 1; // chain offset
- unCompressedData[excIndex + offset] = excValue; // patch
+ int nextExceptionIndex = out[excIndex] + excIndex + 1; // chain offset
+ out[excIndex + offset] = excValue; // patch
assert nextExceptionIndex > excIndex;
- numExceptions++;
return nextExceptionIndex;
}
}