Index: lucene/src/test/org/apache/lucene/index/codecs/intblock/TestPForDeltaFixedIntBlockCodec.java =================================================================== --- lucene/src/test/org/apache/lucene/index/codecs/intblock/TestPForDeltaFixedIntBlockCodec.java (revision 0) +++ lucene/src/test/org/apache/lucene/index/codecs/intblock/TestPForDeltaFixedIntBlockCodec.java (revision 0) @@ -0,0 +1,87 @@ +package org.apache.lucene.index.codecs.intblock; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.BulkPostingsEnum; +import org.apache.lucene.index.codecs.pfordelta2.PForDeltaFixedIntBlockCodec; +import org.apache.lucene.index.codecs.sep.*; +import org.apache.lucene.store.*; +import org.apache.lucene.util.LuceneTestCase; + +/** + * This class is to test the PForDeltaFixedIntBlockCodec + * + * + */ + +public class TestPForDeltaFixedIntBlockCodec extends LuceneTestCase { + + public void testPForDeltaSimpleIntBlocks() throws Exception { + Directory dir = newDirectory(); + int blockSize = 128; + IntStreamFactory f = new PForDeltaFixedIntBlockCodec(blockSize).getIntFactory(); + int testDataSize = 212402; + int[] testData = new int[testDataSize]; + for(int i=0; i 0); + + for(int i=0;i 0); + pointer = 0; + } + } + in.close(); + + dir.close(); + } + + public void testPForDeltaEmptySimpleIntBlocks() throws Exception { + Directory dir = newDirectory(); + + IntStreamFactory f = new PForDeltaFixedIntBlockCodec(128).getIntFactory(); + IntIndexOutput out = f.createOutput(dir, "test"); + + // write no ints + out.close(); + + IntIndexInput in = f.openInput(dir, "test"); + in.reader(); + // read no ints + in.close(); + dir.close(); + } +} + Property changes on: lucene/src/test/org/apache/lucene/index/codecs/intblock/TestPForDeltaFixedIntBlockCodec.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java (revision 1050843) +++ lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java (working copy) @@ -125,21 +125,31 @@ } } + private boolean abort; + @Override public void write(int v) throws IOException { - upto -= add(v)-1; - assert upto >= 0; + boolean success = false; + try { + upto -= add(v)-1; + assert upto >= 0; + success = true; + } finally { + abort |= !success; + } } @Override public void close() throws IOException { try { // stuff 0s in until the "real" data is flushed: - int stuffed = 0; - while(upto > stuffed) { - upto -= add(0)-1; - assert upto >= 0; - stuffed += 1; + if (!abort) { + int stuffed = 0; + while(upto > stuffed) { + upto -= add(0)-1; + assert upto >= 0; + stuffed += 1; + } } } finally { out.close(); Index: lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java (revision 1050843) +++ lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java (working copy) @@ -118,11 +118,19 @@ } } + private boolean abort; + @Override public void write(int v) throws IOException { buffer[upto++] = v; if (upto == blockSize) { - flushBlock(); + boolean success = false; + try { + flushBlock(); + success = true; + } finally { + abort |= !success; + } upto = 0; } } @@ -130,11 +138,29 @@ @Override public void close() throws IOException { try { + // NOTE: entries in the block after current upto are + // invalid + if (!abort) { + while(upto != 0) { + // nocommit -- risky since in theory a "smart" int + // encoder could do run-length-encoding and thus + // never flush on an infinite stream of 0s; maybe + // flush upto instead? or random ints heh + // stuff 0s until final block is flushed + //System.out.println("upto=" + upto + " stuff 0; blockSize=" + blockSize); + write(0); + } + } + /* if (upto > 0) { - // NOTE: entries in the block after current upto are - // invalid - flushBlock(); + while(upto < blockSize) { + write(0); + upto++; + System.out.println("FILL"); + } + //flushBlock(); } + */ } finally { out.close(); } Index: lucene/src/java/org/apache/lucene/index/codecs/pfordelta2/PForDeltaFixedIntBlockCodec.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/pfordelta2/PForDeltaFixedIntBlockCodec.java (revision 0) +++ lucene/src/java/org/apache/lucene/index/codecs/pfordelta2/PForDeltaFixedIntBlockCodec.java (revision 0) @@ -0,0 +1,256 @@ +package org.apache.lucene.index.codecs.pfordelta2; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import java.io.IOException; +import java.util.Arrays; +import java.util.Set; + +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.codecs.Codec; +import org.apache.lucene.index.codecs.FieldsConsumer; +import org.apache.lucene.index.codecs.FieldsProducer; +import org.apache.lucene.index.codecs.sep.IntStreamFactory; +import org.apache.lucene.index.codecs.sep.IntIndexInput; +import org.apache.lucene.index.codecs.sep.IntIndexOutput; +import org.apache.lucene.index.codecs.sep.SepPostingsReaderImpl; +import org.apache.lucene.index.codecs.sep.SepPostingsWriterImpl; +import org.apache.lucene.index.codecs.intblock.FixedIntBlockIndexInput; +import org.apache.lucene.index.codecs.intblock.FixedIntBlockIndexOutput; +import org.apache.lucene.index.codecs.FixedGapTermsIndexReader; +import org.apache.lucene.index.codecs.FixedGapTermsIndexWriter; +import org.apache.lucene.index.codecs.PostingsWriterBase; +import org.apache.lucene.index.codecs.PostingsReaderBase; +import org.apache.lucene.index.codecs.PrefixCodedTermsReader; +import org.apache.lucene.index.codecs.PrefixCodedTermsWriter; +import org.apache.lucene.index.codecs.TermsIndexReaderBase; +import org.apache.lucene.index.codecs.TermsIndexWriterBase; +import org.apache.lucene.index.codecs.standard.StandardCodec; +import org.apache.lucene.store.*; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.pfor2.PForDelta; + +/** + * A codec for fixed sized int block encoders. The int encoder + * used here writes each block as data encoded by PForDelta. + */ + +public class PForDeltaFixedIntBlockCodec extends Codec { + + private final int blockSize; + + public PForDeltaFixedIntBlockCodec(int blockSize) { + this.blockSize = blockSize; + name = "PatchedFrameOfRef2"; + } + + @Override + public String toString() { + return name + "(blockSize=" + blockSize + ")"; + } + + /** + * Encode a block of integers using PForDelta and + * @param block the input block to be compressed + * @param elementNum the number of elements in the block to be compressed + * @return the compressed size in the number of integers of the compressed data + * @throws Exception + */ + int[] encodeOneBlockWithPForDelta(final int[] block, int elementNum) // throws Exception + { + assert block != null && block.length > 0; + /* + if(block == null || block.length == 0) + { + throw new Exception("input block is empty"); + } + */ + + final int[] compressedBlock = PForDelta.compressOneBlock(block, elementNum); + assert compressedBlock != null; + + //if(compressedBlock == null) + //{ + //throw new Exception("compressed buffer is null"); + //} + return compressedBlock; + } + + /** + * Decode a block of compressed data (using PForDelta) into a block of elementNum uncompressed integers + * @param block the input block to be decompressed + * @param elementNum the number of elements in the block to be compressed + */ + void decodeOneBlockWithPForDelta(final int[] block, int elementNum, final int[] output) + { + int[] decompressedBlock = PForDelta.decompressOneBlock(block, elementNum); + System.arraycopy(decompressedBlock, 0, output, 0, decompressedBlock.length); + } + + + public IntStreamFactory getIntFactory() { + return new PForDeltaIntFactory(); + } + + private class PForDeltaIntFactory extends IntStreamFactory { + + @Override + public IntIndexInput openInput(Directory dir, String fileName, int readBufferSize) throws IOException { + return new FixedIntBlockIndexInput(dir.openInput(fileName, readBufferSize)) { + + @Override + protected BlockReader getBlockReader(final IndexInput in, final int[] buffer) throws IOException { + return new BlockReader() { + // nocommit fixed size: + private final int[] compressedData = new int[256]; + public void seek(long pos) {} + public void readBlock() throws IOException { + if(buffer != null) + { + // retrieve the compressed size in ints + final int compressedSizeInInt = in.readInt(); + // read the compressed data (compressedSizeInInt ints) + for(int i=0;i files) { + SepPostingsReaderImpl.files(segmentInfo, codecId, files); + PrefixCodedTermsReader.files(dir, segmentInfo, codecId, files); + FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files); + } + + @Override + public void getExtensions(Set extensions) { + SepPostingsWriterImpl.getExtensions(extensions); + PrefixCodedTermsReader.getExtensions(extensions); + FixedGapTermsIndexReader.getIndexExtensions(extensions); + } +} Property changes on: lucene/src/java/org/apache/lucene/index/codecs/pfordelta2/PForDeltaFixedIntBlockCodec.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java (revision 1050843) +++ lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java (working copy) @@ -29,6 +29,7 @@ import org.apache.lucene.index.codecs.standard.StandardCodec; import org.apache.lucene.index.codecs.pfordelta.PatchedFrameOfRefCodec; import org.apache.lucene.index.codecs.pfordelta.FrameOfRefCodec; +import org.apache.lucene.index.codecs.pfordelta2.PForDeltaFixedIntBlockCodec; /** Holds a set of codecs, keyed by name. You subclass * this, instantiate it, and register your codecs, then @@ -49,7 +50,7 @@ private final Set knownExtensions = new HashSet(); - public final static String[] CORE_CODECS = new String[] {"Standard", "Pulsing", "PreFlex", "SimpleText", "PatchedFrameOfRef", "FrameOfRef"}; + public final static String[] CORE_CODECS = new String[] {"Standard", "Pulsing", "PreFlex", "SimpleText", "PatchedFrameOfRef", "FrameOfRef", "PatchedFrameOfRef2"}; public synchronized void register(Codec codec) { if (codec.name == null) { @@ -174,5 +175,6 @@ register(new SimpleTextCodec()); register(new PatchedFrameOfRefCodec()); register(new FrameOfRefCodec()); + register(new PForDeltaFixedIntBlockCodec(128)); } } Index: lucene/src/java/org/apache/lucene/util/pfor/PForCompress.java =================================================================== --- lucene/src/java/org/apache/lucene/util/pfor/PForCompress.java (revision 1050843) +++ lucene/src/java/org/apache/lucene/util/pfor/PForCompress.java (working copy) @@ -18,6 +18,7 @@ // nocommit need low level unit tests for this // nocommit break out decompress seperately? +// nocommit merge w/ pfor2 before landing to trunk! import java.util.Arrays; Index: lucene/src/java/org/apache/lucene/util/pfor2/Simple16.java =================================================================== --- lucene/src/java/org/apache/lucene/util/pfor2/Simple16.java (revision 0) +++ lucene/src/java/org/apache/lucene/util/pfor2/Simple16.java (revision 0) @@ -0,0 +1,123 @@ +package org.apache.lucene.util.pfor2; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Implementation of the Simple16 algorithm for sorted integer arrays. The basic ideas are based on papers from + * + * 1. http://www2008.org/papers/pdf/p387-zhangA.pdf + * + * 2. http://www2009.org/proceedings/pdf/p401.pdf + * + */ + +public class Simple16 { + + private static final int S16_NUMSIZE = 16; + private static final int S16_BITSSIZE = 28; + // the possible number of bits used to represent one integer + private static final int[] S16_NUM = {28, 21, 21, 21, 14, 9, 8, 7, 6, 6, 5, 5, 4, 3, 2, 1}; + // the corresponding number of elements for each value of the number of bits + private static final int[][] S16_BITS = { {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, + {2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0}, + {1,1,1,1,1,1,1,2,2,2,2,2,2,2,1,1,1,1,1,1,1,0,0,0,0,0,0,0}, + {1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,0,0,0,0,0,0,0}, + {2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, + {4,3,3,3,3,3,3,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, + {3,4,4,4,4,3,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, + {4,4,4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, + {5,5,5,5,4,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, + {4,4,5,5,5,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, + {6,6,6,5,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, + {5,5,6,6,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, + {7,7,7,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, + {10,9,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, + {14,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, + {28,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} }; + + /** + * Compress an integer array using Simple16 + * + * @param out the compressed output + * @param outOffset the offset of the output in the number of integers + * @param in the integer input array + * @param inOffset the offset of the input in the number of integers + * @param n the number of elements to be compressed + * @return the number of compressed integers + */ + public static final int s16Compress(int[] out, int outOffset, int[] in, int inOffset, int n, int blockSize, int oriBlockSize, int[] oriInputBlock) + { + int numIdx, j, num, bits; + for (numIdx = 0; numIdx < S16_NUMSIZE; numIdx++) + { + out[outOffset] = numIdx<>>S16_BITSSIZE; + int num = S16_NUM[numIdx] < n ? S16_NUM[numIdx] : n; + for(j=0, bits=0; j>> inWithIntOffset); + return val & (0xffffffff >>> (32 - bits)); + } + } Property changes on: lucene/src/java/org/apache/lucene/util/pfor2/Simple16.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/src/java/org/apache/lucene/util/pfor2/PForDelta.java =================================================================== --- lucene/src/java/org/apache/lucene/util/pfor2/PForDelta.java (revision 0) +++ lucene/src/java/org/apache/lucene/util/pfor2/PForDelta.java (revision 0) @@ -0,0 +1,349 @@ +package org.apache.lucene.util.pfor2; + + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Arrays; + +/** + * Implementation of the optimized PForDelta algorithm for sorted integer arrays. The basic ideas are based on + * + * 1. Original algorithm from + * http://homepages.cwi.nl/~heman/downloads/msthesis.pdf + * + * 2. Optimization and + * variation from http://www2008.org/papers/pdf/p387-zhangA.pdf + * + * 3. Further optimization + * http://www2009.org/proceedings/pdf/p401.pdf + * + * As a part of the PForDelta implementation, Simple16 is used to compress exceptions. The original Simple16 algorithm can also be found in the above literatures. + * @author hao yan, hyan2008@gmail.com + */ +// nocommit -- must merge our 2 pfor impls before landing on trunk +public class PForDelta{ + + //All possible values of b in the PForDelta algorithm + private static final int[] POSSIBLE_B = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,16,20,28}; + // Max number of bits to store an uncompressed value + private static final int MAX_BITS = 32; + // Header records the value of b and the number of exceptions in the block + private static final int HEADER_NUM = 1; + // Header size in bits + private static final int HEADER_SIZE = MAX_BITS * HEADER_NUM; + + private static final int[] MASK = {0x00000000, + 0x00000001, 0x00000003, 0x00000007, 0x0000000f, 0x0000001f, 0x0000003f, + 0x0000007f, 0x000000ff, 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff, + 0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff, 0x0001ffff, 0x0003ffff, + 0x0007ffff, 0x000fffff, 0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff, + 0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff, 0x1fffffff, 0x3fffffff, + 0x7fffffff, 0xffffffff}; + + /** + * Compress one block of blockSize integers using PForDelta with the optimal parameter b + * @param inBlock the block to be compressed + * @param blockSize the block size + * @return the compressed block + */ + public static int[] compressOneBlock(final int[] inBlock, int blockSize) + { + // find the best b that can lead to the smallest overall compressed size + int currentB = POSSIBLE_B[0]; + int tmpB = currentB; + int optSize = estimateCompressedSize(inBlock, tmpB, blockSize); + for (int i = 1; i < POSSIBLE_B.length; ++i) + { + tmpB = POSSIBLE_B[i]; + int curSize = estimateCompressedSize(inBlock, tmpB, blockSize); + if(curSize < optSize) + { + currentB = tmpB; + optSize = curSize; + } + } + + // compress the block using the above best b + int[] outBlock = compressOneBlockCore(inBlock, currentB, blockSize); + + return outBlock; + } + + /** + * Decompress one block using PForDelta + * @param inBlock the block to be decompressed + * @param blockSize the number of elements in the decompressed block + * @return the decompressed block + */ + public static int[] decompressOneBlock(int[] inBlock, int blockSize) + { + int[] expPos = new int[blockSize]; + int[] expHighBits = new int[blockSize]; + int[] outBlock = new int[blockSize]; + assert inBlock != null; + /* + if(inBlock == null) + { + System.out.println("error: compBlock is null"); + return null; + } + */ + + int expNum = inBlock[0] & 0x3ff; + int bits = (inBlock[0]>>>10) & (0x1f); + + // decompress the b-bit slots + int offset = HEADER_SIZE; + int compressedBits = 0; + if(bits == 0) + { + Arrays.fill(outBlock,0); + } + else + { + compressedBits = decompressBBitSlots(outBlock, inBlock, blockSize, bits); + } + offset += compressedBits; + + // decompress exceptions + if(expNum>0) + { + compressedBits = decompressBlockByS16(expPos, inBlock, offset, expNum); + offset += compressedBits; + compressedBits = decompressBlockByS16(expHighBits, inBlock, offset, expNum); + offset += compressedBits; + + for (int i = 0; i < expNum; i++) + { + int curExpPos = expPos[i] ; + int curHighBits = expHighBits[i]; + outBlock[curExpPos] = (outBlock[curExpPos] & MASK[bits]) | ((curHighBits & MASK[32-bits] ) << bits); + } + } + return outBlock; + } + + /** + * Estimate the compressed size in ints of a block + * @param inputBlock the block to be compressed + * @param bits the value of the parameter b + * @param blockSize the block size + * @return the compressed size in ints + * @throws IllegalArgumentException + */ + private static int estimateCompressedSize(int[] inputBlock, int bits, int blockSize) throws IllegalArgumentException { + int maxNoExp = (1< maxNoExp) + { + expNum++; + } + } + outputOffset += (expNum<<5); + + return outputOffset; + } + + /** + * The core implementation of compressing a block with blockSize integers using PForDelta with the given parameter b + * @param inputBlock the block to be compressed + * @param bits the the value of the parameter b + * @param blockSize the block size + * @return the compressed block + * @throws IllegalArgumentException + */ + private static int[] compressOneBlockCore(int[] inputBlock, int bits, int blockSize) throws IllegalArgumentException { + int[] expPos = new int[blockSize]; + int[] expHighBits = new int[blockSize]; + + int maxCompBitSize = HEADER_SIZE + blockSize * (MAX_BITS + MAX_BITS + MAX_BITS) + 32; + int[] tmpCompressedBlock = new int[(maxCompBitSize>>>5)]; + + int outputOffset = HEADER_SIZE; + int expUpperBound = 1<= 0: "input value is " + inputBlock[i]; + /* + if(inputBlock[i] < 0) + { + System.out.println("haha<0: [" + i +"]" + inputBlock[i]); + } + */ + if (inputBlock[i] < expUpperBound) + { + writeBits(tmpCompressedBlock, inputBlock[i], outputOffset, bits); + } + else // exp + { + // store the lower bits-bits of the exception + writeBits(tmpCompressedBlock, inputBlock[i] & MASK[bits], outputOffset, bits); + // write the position of exception + expPos[expNum] = i; + // write the higher 32-bits bits of the exception + expHighBits[expNum] = (inputBlock[i] >>> bits) & MASK[32-bits]; + expNum++; + } + outputOffset += bits; + } + + // the first int in the compressed block stores the value of b and the number of exceptions + tmpCompressedBlock[0] = ((bits & MASK[10]) << 10) | (expNum & 0x3ff); + + // compress exceptions + if(expNum>0) + { + int compressedBitSize = compressBlockByS16(tmpCompressedBlock, outputOffset, expPos, expNum, blockSize, inputBlock); + outputOffset += compressedBitSize; + compressedBitSize = compressBlockByS16(tmpCompressedBlock, outputOffset, expHighBits, expNum, blockSize, inputBlock); + outputOffset += compressedBitSize; + } + + // discard the redundant parts in the tmpCompressedBlock + int compressedSizeInInts = (outputOffset+31)>>>5; + int[] compBlock; + compBlock = new int[compressedSizeInInts]; + System.arraycopy(tmpCompressedBlock,0, compBlock, 0, compressedSizeInInts); + + return compBlock; + } + + /** + * Decompress b-bit slots + * @param outDecompSlots decompressed block which is the output + * @param inCompBlock the compressed block which is the input + * @param blockSize the block size + * @param bits the value of the parameter b + * @return the compressed size in bits of the data that has been decompressed + */ + private static int decompressBBitSlots(int[] outDecompSlots, int[] inCompBlock, int blockSize, int bits) + { + int compressedBitSize = 0; + int offset = HEADER_SIZE; + for(int i =0; i>>5; + int num, inOffset=0, numLeft; + for(numLeft=blockSize; numLeft>0; numLeft -= num) + { + num = Simple16.s16Compress(outCompBlock, outOffset, inBlock, inOffset, numLeft, blockSize, oriBlockSize, oriInputBlock); + assert num >= 0; + /* + if(num<0) + { + System.out.println("oops: s16 get -1 "); + } + */ + outOffset++; + inOffset += num; + } + int compressedBitSize = (outOffset<<5)-outStartOffsetInBits; + return compressedBitSize; + } + + /** + * Decompress a block of blockSize integers using Simple16 algorithm + * @param outDecompBlock the decompressed block which is the output + * @param inCompBlock the compressed block which is the input + * @param blockSize the block size + * @param inStartOffsetInBits the start offset in bits of the compressed block + * @return the compressed size in bits of the data that has been decompressed + */ + private static int decompressBlockByS16(int[] outDecompBlock, int[] inCompBlock, int inStartOffsetInBits, int blockSize) + { + int inOffset = (inStartOffsetInBits+31)>>>5; + int num, outOffset=0, numLeft; + for(numLeft=blockSize; numLeft>0; numLeft -= num) + { + num = Simple16.s16Decompress(outDecompBlock, outOffset, inCompBlock, inOffset, numLeft); + outOffset += num; + inOffset++; + } + int compressedBitSize = (inOffset<<5)-inStartOffsetInBits; + return compressedBitSize; + } + + + /** + * Write a certain number of bits of an integer into an integer array starting from the given start offset + * + * @param out the output array + * @param val the integer to be written + * @param outOffset the start offset in bits in the output array + * @param bits the number of bits to be written (bits>=0) + */ + private static final void writeBits(int[] out, int val, int outOffset, int bits) { + if(bits == 0) + return; + final int index = outOffset >>> 5; + final int skip = outOffset & 0x1f; + val &= (0xffffffff >>> (32 - bits)); + out[index] |= (val << skip); + if (32 - skip < bits) { + out[index + 1] |= (val >>> (32 - skip)); + } + } + + /** + * Read a certain number of bits of an integer into an integer array starting from the given start offset + * + * @param in the input array + * @param val the integer to be read + * @param inOffset the start offset in bits in the input array + * @param bits the number of bits to be read, unlike writeBits(), readBits() does not deal with bits==0 and thus bits must > 0. When bits ==0, the calling functions will just skip the entire bits-bit slots without decoding them + * @return the bits bits of the input + */ + private static final int readBits(int[] in, final int inOffset, final int bits) { + final int index = inOffset >>> 5; + final int skip = inOffset & 0x1f; + int val = in[index] >>> skip; + if (32 - skip < bits) { + val |= (in[index + 1] << (32 - skip)); + } + return val & (0xffffffff >>> (32 - bits)); + } + +} + + Property changes on: lucene/src/java/org/apache/lucene/util/pfor2/PForDelta.java ___________________________________________________________________ Added: svn:eol-style + native