Index: src/test/org/apache/lucene/util/packed/TestPackedInts.java =================================================================== --- src/test/org/apache/lucene/util/packed/TestPackedInts.java (revision 0) +++ src/test/org/apache/lucene/util/packed/TestPackedInts.java (revision 0) @@ -0,0 +1,196 @@ +package org.apache.lucene.util.packed; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.store.*; +import org.apache.lucene.util.LuceneTestCase; + +import java.util.ArrayList; +import java.util.List; +import java.util.Random; +import java.io.IOException; + +public class TestPackedInts extends LuceneTestCase { + + private Random rnd; + + public void testBitsRequired() throws Exception { + assertEquals(61, PackedInts.bitsRequired((long)Math.pow(2, 61)-1)); + assertEquals(61, PackedInts.bitsRequired(0x1FFFFFFFFFFFFFFFL)); + assertEquals(62, PackedInts.bitsRequired(0x3FFFFFFFFFFFFFFFL)); + assertEquals(63, PackedInts.bitsRequired(0x7FFFFFFFFFFFFFFFL)); + } + + public void testMaxValues() throws Exception { + assertEquals("1 bit -> max == 1", + 1, PackedInts.maxValue(1)); + assertEquals("2 bit -> max == 3", + 3, PackedInts.maxValue(2)); + assertEquals("8 bit -> max == 255", + 255, PackedInts.maxValue(8)); + assertEquals("63 bit -> max == Long.MAX_VALUE", + Long.MAX_VALUE, PackedInts.maxValue(63)); + assertEquals("64 bit -> max == Long.MAX_VALUE (same as for 63 bit)", + Long.MAX_VALUE, PackedInts.maxValue(64)); + } + + public void testPackedInts() throws IOException { + rnd = newRandom(); + for(int iter=0;iter<5;iter++) { + long ceil = 2; + for(int nbits=1;nbits<63;nbits++) { + final int valueCount = 100+rnd.nextInt(500); + final Directory d = new MockRAMDirectory(); + + IndexOutput out = d.createOutput("out.bin"); + PackedInts.Writer w = PackedInts.getWriter( + out, valueCount, nbits); + + final long[] values = new long[valueCount]; + for(int i=0;i packedInts = + createPackedInts(VALUE_COUNT, BITS_PER_VALUE); + for (PackedInts.Mutable packedInt: packedInts) { + for (int i = 0 ; i < packedInt.size() ; i++) { + packedInt.set(i, i+1); + } + } + assertListEquality(packedInts); + } + + public void xxxtestRandomEquality() { + final int[] VALUE_COUNTS = new int[]{0, 1, 5, 8, 100, 500}; + final int MIN_BITS_PER_VALUE = 1; + final int MAX_BITS_PER_VALUE = 64; + + rnd = newRandom(); + + for (int valueCount: VALUE_COUNTS) { + for (int bitsPerValue = MIN_BITS_PER_VALUE ; + bitsPerValue <= MAX_BITS_PER_VALUE ; + bitsPerValue++) { + assertRandomEquality(valueCount, bitsPerValue, rnd.nextLong()); + } + } + } + + private void assertRandomEquality(int valueCount, int bitsPerValue, long randomSeed) { + List packedInts = createPackedInts(valueCount, bitsPerValue); + for (PackedInts.Mutable packedInt: packedInts) { + try { + fill(packedInt, (long)(Math.pow(2, bitsPerValue)-1), randomSeed); + } catch (Exception e) { + e.printStackTrace(System.err); + fail(String.format( + "Exception while filling %s: valueCount=%d, bitsPerValue=%s", + packedInt.getClass().getSimpleName(), + valueCount, bitsPerValue)); + } + } + assertListEquality(packedInts); + } + + private List createPackedInts( + int valueCount, int bitsPerValue) { + List packedInts = new ArrayList(); + if (bitsPerValue <= 8) { + packedInts.add(new Direct8(valueCount)); + } + if (bitsPerValue <= 16) { + packedInts.add(new Direct16(valueCount)); + } + if (bitsPerValue <= 31) { + packedInts.add(new Packed32(valueCount, bitsPerValue)); + } + if (bitsPerValue <= 32) { + packedInts.add(new Direct32(valueCount)); + } + if (bitsPerValue <= 63) { + packedInts.add(new Packed64(valueCount, bitsPerValue)); + } + packedInts.add(new Direct64(valueCount)); + return packedInts; + } + + private void fill(PackedInts.Mutable packedInt, long maxValue, long randomSeed) { + Random rnd2 = new Random(randomSeed); + maxValue++; + for (int i = 0 ; i < packedInt.size() ; i++) { + long value = Math.abs(rnd2.nextLong() % maxValue); + packedInt.set(i, value); + assertEquals(String.format( + "The set/get of the value at index %d should match for %s", + i, packedInt.getClass().getSimpleName()), + value, packedInt.get(i)); + } + } + + private void assertListEquality( + List packedInts) { + assertListEquality("", packedInts); + } + + private void assertListEquality( + String message, List packedInts) { + if (packedInts.size() == 0) { + return; + } + PackedInts.Reader base = packedInts.get(0); + int valueCount = base.size(); + for (PackedInts.Reader packedInt: packedInts) { + assertEquals(message + ". The number of values should be the same ", + valueCount, packedInt.size()); + } + for (int i = 0 ; i < valueCount ; i++) { + for (int j = 1 ; j < packedInts.size() ; j++) { + assertEquals(String.format( + "%s. The value at index %d should be the same for %s and %s", + message, i, base.getClass().getSimpleName(), + packedInts.get(j).getClass().getSimpleName()), + base.get(i), packedInts.get(j).get(i)); + } + } + } +} Property changes on: src/test/org/apache/lucene/util/packed/TestPackedInts.java ___________________________________________________________________ Added: svn:eol-style + native Index: src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java =================================================================== --- src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java (revision 921005) +++ src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java (working copy) @@ -20,6 +20,7 @@ import java.io.IOException; import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.util.CodecUtil; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.standard.StandardPostingsWriter; import org.apache.lucene.store.IndexOutput; @@ -133,7 +134,7 @@ @Override public void start(IndexOutput termsOut) throws IOException { this.termsOut = termsOut; - Codec.writeHeader(termsOut, CODEC, VERSION_CURRENT); + CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT); termsOut.writeVInt(pendingDocs.length); wrappedPostingsWriter.start(termsOut); } Index: src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java =================================================================== --- src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java (revision 921005) +++ src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java (working copy) @@ -30,6 +30,7 @@ import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.CodecUtil; /** Concrete class that reads the current doc/freq/skip * postings format @@ -51,7 +52,7 @@ @Override public void init(IndexInput termsIn) throws IOException { - Codec.checkHeader(termsIn, PulsingPostingsWriterImpl.CODEC, PulsingPostingsWriterImpl.VERSION_START); + CodecUtil.checkHeader(termsIn, PulsingPostingsWriterImpl.CODEC, PulsingPostingsWriterImpl.VERSION_START); maxPulsingDocFreq = termsIn.readVInt(); wrappedPostingsReader.init(termsIn); } Index: src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java =================================================================== --- src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (revision 921005) +++ src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (working copy) @@ -32,6 +32,7 @@ import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.CodecUtil; /** Concrete class that reads the current doc/freq/skip * postings format. @@ -95,7 +96,7 @@ @Override public void init(IndexInput termsIn) throws IOException { // Make sure we are talking to the matching past writer - Codec.checkHeader(termsIn, SepPostingsWriterImpl.CODEC, SepPostingsWriterImpl.VERSION_START); + CodecUtil.checkHeader(termsIn, SepPostingsWriterImpl.CODEC, SepPostingsWriterImpl.VERSION_START); skipInterval = termsIn.readInt(); maxSkipLevels = termsIn.readInt(); } Index: src/java/org/apache/lucene/index/codecs/sep/SingleIntIndexInput.java =================================================================== --- src/java/org/apache/lucene/index/codecs/sep/SingleIntIndexInput.java (revision 921005) +++ src/java/org/apache/lucene/index/codecs/sep/SingleIntIndexInput.java (working copy) @@ -22,6 +22,7 @@ import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.CodecUtil; /** Reads IndexInputs written with {@link * SingleIntIndexoutput}. NOTE: this class is just for @@ -36,7 +37,7 @@ public SingleIntIndexInput(Directory dir, String fileName, int readBufferSize) throws IOException { in = dir.openInput(fileName, readBufferSize); - Codec.checkHeader(in, SingleIntIndexOutput.CODEC, SingleIntIndexOutput.VERSION_START); + CodecUtil.checkHeader(in, SingleIntIndexOutput.CODEC, SingleIntIndexOutput.VERSION_START); } @Override Index: src/java/org/apache/lucene/index/codecs/sep/SingleIntIndexOutput.java =================================================================== --- src/java/org/apache/lucene/index/codecs/sep/SingleIntIndexOutput.java (revision 921005) +++ src/java/org/apache/lucene/index/codecs/sep/SingleIntIndexOutput.java (working copy) @@ -20,6 +20,7 @@ import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.Directory; import org.apache.lucene.index.codecs.Codec; +import org.apache.lucene.util.CodecUtil; import java.io.IOException; @@ -36,7 +37,7 @@ public SingleIntIndexOutput(Directory dir, String fileName) throws IOException { out = dir.createOutput(fileName); - Codec.writeHeader(out, CODEC, VERSION_CURRENT); + CodecUtil.writeHeader(out, CODEC, VERSION_CURRENT); } /** Write an int to the primary file */ Index: src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java =================================================================== --- src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java (revision 921005) +++ src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java (working copy) @@ -27,6 +27,7 @@ import org.apache.lucene.index.codecs.standard.StandardPostingsWriter; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.CodecUtil; /** Writes frq to .frq, docs to .doc, pos to .pos, payloads * to .pyl, skip data to .skp @@ -126,7 +127,7 @@ @Override public void start(IndexOutput termsOut) throws IOException { this.termsOut = termsOut; - Codec.writeHeader(termsOut, CODEC, VERSION_CURRENT); + CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT); // TODO: -- just ask skipper to "start" here termsOut.writeInt(skipInterval); // write skipInterval termsOut.writeInt(maxSkipLevels); // write maxSkipLevels Index: src/java/org/apache/lucene/index/codecs/Codec.java =================================================================== --- src/java/org/apache/lucene/index/codecs/Codec.java (revision 921005) +++ src/java/org/apache/lucene/index/codecs/Codec.java (working copy) @@ -20,21 +20,16 @@ import java.io.IOException; import java.util.Set; -import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.store.IndexOutput; /** @lucene.experimental */ public abstract class Codec { public static boolean DEBUG = false; - private static final int CODEC_HEADER = 0x1af65; - /** Unique name that's used to retrieve this codec when * reading the index */ public String name; @@ -64,42 +59,4 @@ /** Records all file extensions this codec uses */ public abstract void getExtensions(Set extensions); - - /** @return Actual version of the file */ - public static int checkHeader(IndexInput in, String codec, int version) throws IOException { - - // Safety to guard against reading a bogus string: - int header = in.readInt(); - if (header != CODEC_HEADER) { - throw new CorruptIndexException("codec header mismatch: " + header + " vs " + CODEC_HEADER); - } - - final String actualCodec = in.readString(); - if (!codec.equals(actualCodec)) { - throw new CorruptIndexException("codec mismatch: expected '" + codec + "' but got '" + actualCodec + "'"); - } - - int actualVersion = in.readInt(); - if (actualVersion > version) { - throw new CorruptIndexException("version '" + actualVersion + "' is too new (expected <= '" + version + "'"); - } - - return actualVersion; - } - - public static void writeHeader(IndexOutput out, String codec, int version) throws IOException { - final long start = out.getFilePointer(); - out.writeInt(CODEC_HEADER); - out.writeString(codec); - out.writeInt(version); - - // So we can easily compute headerSize (below) - if (out.getFilePointer()-start != codec.length() + 9) { - throw new IllegalArgumentException("codec must be simple ASCII, less than 128 characters in length [got " + codec + "]"); - } - } - - public static int headerSize(String codec) { - return 9 + codec.length(); - } } Index: src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java =================================================================== --- src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java (revision 921005) +++ src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexReader.java (working copy) @@ -27,6 +27,7 @@ import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.CodecUtil; import java.util.HashMap; import java.util.Iterator; @@ -84,7 +85,7 @@ boolean success = false; try { - Codec.checkHeader(in, SimpleStandardTermsIndexWriter.CODEC_NAME, SimpleStandardTermsIndexWriter.VERSION_START); + CodecUtil.checkHeader(in, SimpleStandardTermsIndexWriter.CODEC_NAME, SimpleStandardTermsIndexWriter.VERSION_START); if (Codec.DEBUG) { Codec.debug(" sstir init: header tii.fp=" + in.getFilePointer()); Index: src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java =================================================================== --- src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java (revision 921005) +++ src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java (working copy) @@ -41,6 +41,7 @@ import org.apache.lucene.util.cache.Cache; import org.apache.lucene.util.cache.DoubleBarrelLRUCache; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.CodecUtil; /** Handles a terms dict, but decouples all details of * doc/freqs/positions reading to an instance of {@link @@ -113,7 +114,7 @@ boolean success = false; try { - Codec.checkHeader(in, StandardTermsDictWriter.CODEC_NAME, StandardTermsDictWriter.VERSION_CURRENT); + CodecUtil.checkHeader(in, StandardTermsDictWriter.CODEC_NAME, StandardTermsDictWriter.VERSION_CURRENT); final long dirOffset = in.readLong(); Index: src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriterImpl.java =================================================================== --- src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriterImpl.java (revision 921005) +++ src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriterImpl.java (working copy) @@ -29,6 +29,7 @@ import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.CodecUtil; /** @lucene.experimental */ public final class StandardPostingsWriterImpl extends StandardPostingsWriter { @@ -89,7 +90,7 @@ @Override public void start(IndexOutput termsOut) throws IOException { this.termsOut = termsOut; - Codec.writeHeader(termsOut, CODEC, VERSION_CURRENT); + CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT); termsOut.writeInt(skipInterval); // write skipInterval termsOut.writeInt(maxSkipLevels); // write maxSkipLevels } Index: src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexWriter.java =================================================================== --- src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexWriter.java (revision 921005) +++ src/java/org/apache/lucene/index/codecs/standard/SimpleStandardTermsIndexWriter.java (working copy) @@ -24,6 +24,7 @@ import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.util.BytesRef; import org.apache.lucene.index.codecs.Codec; +import org.apache.lucene.util.CodecUtil; import java.util.List; import java.util.ArrayList; @@ -51,7 +52,7 @@ this.segment = state.segmentName; termIndexInterval = state.termIndexInterval; out = state.directory.createOutput(indexFileName); - Codec.writeHeader(out, CODEC_NAME, VERSION_CURRENT); + CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT); fieldInfos = state.fieldInfos; // Placeholder for dir offset @@ -124,10 +125,10 @@ out.writeInt(field.numIndexTerms); out.writeLong(field.indexStart); } - out.seek(Codec.headerSize(CODEC_NAME)); + out.seek(CodecUtil.headerLength(CODEC_NAME)); out.writeLong(dirStart); if (Codec.DEBUG) { - System.out.println(" writeDirStart " + dirStart + " @ " + Codec.headerSize(CODEC_NAME)); + System.out.println(" writeDirStart " + dirStart + " @ " + CodecUtil.headerLength(CODEC_NAME)); } out.close(); } Index: src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictWriter.java =================================================================== --- src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictWriter.java (revision 921005) +++ src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictWriter.java (working copy) @@ -32,6 +32,7 @@ import org.apache.lucene.index.codecs.PostingsConsumer; import org.apache.lucene.index.codecs.TermsConsumer; import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.CodecUtil; /** * Writes terms dict and interacts with docs/positions @@ -81,7 +82,7 @@ fieldInfos = state.fieldInfos; // Count indexed fields up front - Codec.writeHeader(out, CODEC_NAME, VERSION_CURRENT); + CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT); out.writeLong(0); // leave space for end index pointer @@ -129,7 +130,7 @@ if (Codec.DEBUG) System.out.println("stdw.close: field=" + field.fieldInfo.name + " numTerms=" + field.numTerms + " tis pointer=" + field.termsStartPointer); } - out.seek(Codec.headerSize(CODEC_NAME)); + out.seek(CodecUtil.headerLength(CODEC_NAME)); out.writeLong(dirStart); } finally { try { Index: src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReaderImpl.java =================================================================== --- src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReaderImpl.java (revision 921005) +++ src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReaderImpl.java (working copy) @@ -30,6 +30,7 @@ import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.CodecUtil; /** Concrete class that reads the current doc/freq/skip * postings format. @@ -73,7 +74,7 @@ public void init(IndexInput termsIn) throws IOException { // Make sure we are talking to the matching past writer - Codec.checkHeader(termsIn, StandardPostingsWriterImpl.CODEC, StandardPostingsWriterImpl.VERSION_START); + CodecUtil.checkHeader(termsIn, StandardPostingsWriterImpl.CODEC, StandardPostingsWriterImpl.VERSION_START); skipInterval = termsIn.readInt(); maxSkipLevels = termsIn.readInt(); Index: src/java/org/apache/lucene/index/codecs/intblock/SimpleIntBlockIndexInput.java =================================================================== --- src/java/org/apache/lucene/index/codecs/intblock/SimpleIntBlockIndexInput.java (revision 921005) +++ src/java/org/apache/lucene/index/codecs/intblock/SimpleIntBlockIndexInput.java (working copy) @@ -21,7 +21,7 @@ * expected to give poor performance; it's really only for * testing the pluggability. One should typically use pfor instead. */ -import org.apache.lucene.index.codecs.Codec; +import org.apache.lucene.util.CodecUtil; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; @@ -37,7 +37,7 @@ public SimpleIntBlockIndexInput(Directory dir, String fileName, int readBufferSize) throws IOException { IndexInput in = dir.openInput(fileName, readBufferSize); - Codec.checkHeader(in, SimpleIntBlockIndexOutput.CODEC, SimpleIntBlockIndexOutput.VERSION_START); + CodecUtil.checkHeader(in, SimpleIntBlockIndexOutput.CODEC, SimpleIntBlockIndexOutput.VERSION_START); init(in); } Index: src/java/org/apache/lucene/index/codecs/intblock/SimpleIntBlockIndexOutput.java =================================================================== --- src/java/org/apache/lucene/index/codecs/intblock/SimpleIntBlockIndexOutput.java (revision 921005) +++ src/java/org/apache/lucene/index/codecs/intblock/SimpleIntBlockIndexOutput.java (working copy) @@ -21,7 +21,7 @@ * expected to give poor performance; it's really only for * testing the pluggability. One should typically use pfor instead. */ -import org.apache.lucene.index.codecs.Codec; +import org.apache.lucene.util.CodecUtil; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexOutput; @@ -41,7 +41,7 @@ public SimpleIntBlockIndexOutput(Directory dir, String fileName, int blockSize) throws IOException { IndexOutput out = dir.createOutput(fileName); - Codec.writeHeader(out, CODEC, VERSION_CURRENT); + CodecUtil.writeHeader(out, CODEC, VERSION_CURRENT); init(out, blockSize); } Index: src/java/org/apache/lucene/store/IndexInput.java =================================================================== --- src/java/org/apache/lucene/store/IndexInput.java (revision 921005) +++ src/java/org/apache/lucene/store/IndexInput.java (working copy) @@ -67,6 +67,13 @@ readBytes(b, offset, len); } + /** Reads two bytes and returns a short. + * @see IndexOutput#writeByte(byte) + */ + public short readShort() throws IOException { + return (short) (((readByte() & 0xFF) << 8) | (readByte() & 0xFF)); + } + /** Reads four bytes and returns an int. * @see IndexOutput#writeInt(int) */ Index: src/java/org/apache/lucene/util/CodecUtil.java =================================================================== --- src/java/org/apache/lucene/util/CodecUtil.java (revision 0) +++ src/java/org/apache/lucene/util/CodecUtil.java (revision 0) @@ -0,0 +1,72 @@ +package org.apache.lucene.util; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.index.CorruptIndexException; + +import java.io.IOException; + +/** + * @lucene.experimental + */ + +public final class CodecUtil { + private final static int CODEC_MAGIC = 0x3fd76c17; + + public static void writeHeader(IndexOutput out, String codec, int version) + throws IOException { + final long start = out.getFilePointer(); + out.writeInt(CODEC_MAGIC); + out.writeString(codec); + out.writeInt(version); + + // We require this so we can easily pre-compute header length + if (out.getFilePointer()-start != codec.length()+9) { + throw new IllegalArgumentException("codec must be simple ASCII, less than 128 characters in length [got " + codec + "]"); + } + } + + public static int headerLength(String codec) { + return 9+codec.length(); + } + + public static int checkHeader(IndexInput in, String codec, int maxVersion) + throws IOException { + + // Safety to guard against reading a bogus string: + final int actualHeader = in.readInt(); + if (actualHeader != CODEC_MAGIC) { + throw new CorruptIndexException("codec header mismatch: actual header=" + actualHeader + " vs expected header=" + CODEC_MAGIC); + } + + final String actualCodec = in.readString(); + if (!actualCodec.equals(codec)) { + throw new CorruptIndexException("codec mismatch: actual codec=" + actualCodec + " vs expected codec=" + codec); + } + + final int actualVersion = in.readInt(); + if (actualVersion > maxVersion) { + throw new CorruptIndexException("version " + actualVersion + " is too new (expected <= version " + maxVersion + ")"); + } + + return actualVersion; + } +} Property changes on: src/java/org/apache/lucene/util/CodecUtil.java ___________________________________________________________________ Added: svn:eol-style + native Index: src/java/org/apache/lucene/util/packed/Direct32.java =================================================================== --- src/java/org/apache/lucene/util/packed/Direct32.java (revision 0) +++ src/java/org/apache/lucene/util/packed/Direct32.java (revision 0) @@ -0,0 +1,82 @@ +package org.apache.lucene.util.packed; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.RamUsageEstimator; + +import java.io.IOException; +import java.util.Arrays; + +/** + * Direct wrapping of 32 bit values to a backing array of ints. + */ + +class Direct32 extends PackedInts.ReaderImpl + implements PackedInts.Mutable { + private int[] blocks; + private static final int BITS_PER_VALUE = 32; + + public Direct32(int valueCount) { + super(valueCount, BITS_PER_VALUE); + blocks = new int[valueCount]; + } + + public Direct32(IndexInput in, int valueCount) throws IOException { + super(valueCount, BITS_PER_VALUE); + int[] blocks = new int[valueCount]; + for(int i=0;i

+ * Note: The blocks are used directly, so changes to the given block will + * affect the structure. + * @param blocks used as the internal backing array. + */ + public Direct32(int[] blocks) { + super(blocks.length, BITS_PER_VALUE); + this.blocks = blocks; + } + + public long get(final int index) { + return 0xFFFFFFFFL & blocks[index]; + } + + public void set(final int index, final long value) { + blocks[index] = (int)(value & 0xFFFFFFFF); + } + + public long ramBytesUsed() { + return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + + blocks.length * RamUsageEstimator.NUM_BYTES_INT; + } + + public void clear() { + Arrays.fill(blocks, 0); + } +} Property changes on: src/java/org/apache/lucene/util/packed/Direct32.java ___________________________________________________________________ Added: svn:eol-style + native Index: src/java/org/apache/lucene/util/packed/Packed64.java =================================================================== --- src/java/org/apache/lucene/util/packed/Packed64.java (revision 0) +++ src/java/org/apache/lucene/util/packed/Packed64.java (revision 0) @@ -0,0 +1,210 @@ +package org.apache.lucene.util.packed; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.RamUsageEstimator; + +import java.io.IOException; +import java.util.Arrays; + +/** + * Space optimized random access capable array of values with a fixed number of + * bits. For 32 bits/value and less, performance on 32 bit machines is not + * optimal. Consider using {@link Packed32} for such a setup. + *

+ * The implementation strives to avoid conditionals and expensive operations, + * sacrificing code clarity to achieve better performance. + */ + +class Packed64 extends PackedInts.ReaderImpl implements PackedInts.Mutable { + static final int BLOCK_SIZE = 64; // 32 = int, 64 = long + static final int BLOCK_BITS = 6; // The #bits representing BLOCK_SIZE + static final int MOD_MASK = BLOCK_SIZE - 1; // x % BLOCK_SIZE + + private static final int ENTRY_SIZE = BLOCK_SIZE + 1; + private static final int FAC_BITPOS = 3; + + /* + * In order to make an efficient value-getter, conditionals should be + * avoided. A value can be positioned inside of a block, requiring shifting + * left or right or it can span two blocks, requiring a left-shift on the + * first block and a right-shift on the right block. + *

+ * By always shifting the first block both left and right, we get exactly + * the right bits. By always shifting the second block right and applying + * a mask, we get the right bits there. After that, we | the two bitsets. + */ + private static final int[][] SHIFTS = + new int[ENTRY_SIZE][ENTRY_SIZE * FAC_BITPOS]; + //new int[BLOCK_SIZE+1][BLOCK_SIZE][BLOCK_SIZE+1]; + private static final long[][] MASKS = new long[ENTRY_SIZE][ENTRY_SIZE]; + + static { // Generate shifts + for (int elementBits = 1 ; elementBits <= BLOCK_SIZE ; elementBits++) { + for (int bitPos = 0 ; bitPos < BLOCK_SIZE ; bitPos++) { + int[] currentShifts = SHIFTS[elementBits]; + int base = bitPos * FAC_BITPOS; + currentShifts[base ] = bitPos; + currentShifts[base + 1] = BLOCK_SIZE - elementBits; + if (bitPos <= BLOCK_SIZE - elementBits) { // Single block + currentShifts[base + 2] = 0; + MASKS[elementBits][bitPos] = 0; + } else { // Two blocks + int rBits = elementBits - (BLOCK_SIZE - bitPos); + currentShifts[base + 2] = BLOCK_SIZE - rBits; + MASKS[elementBits][bitPos] = ~(~0L << rBits); + } + } + } + } + + /* + * The setter requires more masking than the getter. + */ + private static final long[][] WRITE_MASKS = + new long[ENTRY_SIZE][ENTRY_SIZE * FAC_BITPOS]; + static { + for (int elementBits = 1 ; elementBits <= BLOCK_SIZE ; elementBits++) { + long elementPosMask = ~(~0L << elementBits); + int[] currentShifts = SHIFTS[elementBits]; + long[] currentMasks = WRITE_MASKS[elementBits]; + for (int bitPos = 0 ; bitPos < BLOCK_SIZE ; bitPos++) { + int base = bitPos * FAC_BITPOS; + currentMasks[base ] =~((elementPosMask + << currentShifts[base + 1]) + >>> currentShifts[base]); + currentMasks[base+1] = ~(elementPosMask + << currentShifts[base + 2]); + currentMasks[base+2] = currentShifts[base + 2] == 0 ? 0 : ~0; + } + } + } + + /* The bits */ + private long[] blocks; + + // Cached calculations + private int maxPos; // blocks.length * BLOCK_SIZE / elementBits - 1 + private int[] shifts; // The shifts for the current elementBits + private long[] readMasks; + private long[] writeMasks; + + /** + * Creates an array with the internal structures adjusted for the given + * limits and initialized to 0. + * @param valueCount the number of elements. + * @param bitsPerValue the number of bits available for any given value. + */ + public Packed64(int valueCount, int bitsPerValue) { + // TODO: Test for edge-cases (2^31 values, 63 bitsPerValue) + // +2 due to the avoid-conditionals-trick. The last entry is always 0 + this(new long[(int)((long)valueCount * bitsPerValue / BLOCK_SIZE + 2)], + valueCount, bitsPerValue); + } + + + /** + * Creates an array backed by the given blocks. + *

+ * Note: The blocks are used directly, so changes to the given block will + * affect the Packed32-structure. + * @param blocks used as the internal backing array. Not that the last + * element cannot be addressed directly. + * @param valueCount the number of values. + * @param bitsPerValue the number of bits available for any given value. + */ + public Packed64(long[] blocks, int valueCount, int bitsPerValue) { + super(valueCount, bitsPerValue); + this.blocks = blocks; + updateCached(); + } + + /** + * Creates an array with content retrieved from the given IndexInput. + * @param in an IndexInput, positioned at the start of Packed64-content. + * @param valueCount the number of elements. + * @param bitsPerValue the number of bits available for any given value. + * @throws java.io.IOException if the values for the backing array could not + * be retrieved. + */ + public Packed64(IndexInput in, int valueCount, int bitsPerValue) + throws IOException { + super(valueCount, bitsPerValue); + int size = size(valueCount, bitsPerValue); + blocks = new long[size+1]; // +1 due to non-conditional tricks + for(int i=0;i>> BLOCK_BITS); // / BLOCK_SIZE + final int bitPos = (int)(majorBitPos & MOD_MASK); // % BLOCK_SIZE); + + final int base = bitPos * FAC_BITPOS; + + return ((blocks[elementPos] << shifts[base]) >>> shifts[base+1]) | + ((blocks[elementPos+1] >>> shifts[base+2]) & readMasks[bitPos]); + } + + public void set(final int index, final long value) { + final long majorBitPos = index * bitsPerValue; + final int elementPos = (int)(majorBitPos >>> BLOCK_BITS); // / BLOCK_SIZE + final int bitPos = (int)(majorBitPos & MOD_MASK); // % BLOCK_SIZE); + final int base = bitPos * FAC_BITPOS; + + blocks[elementPos ] = (blocks[elementPos ] & writeMasks[base]) + | (value << shifts[base + 1] >>> shifts[base]); + blocks[elementPos+1] = (blocks[elementPos+1] & writeMasks[base+1]) + | ((value << shifts[base + 2]) & writeMasks[base+2]); + } + + public String toString() { + return "Packed64(bitsPerValue=" + bitsPerValue + ", size=" + + size() + ", maxPos=" + maxPos + + ", elements.length=" + blocks.length + ")"; + } + + public long ramBytesUsed() { + return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + + blocks.length * RamUsageEstimator.NUM_BYTES_LONG; + } + + public void clear() { + Arrays.fill(blocks, 0L); + } +} Property changes on: src/java/org/apache/lucene/util/packed/Packed64.java ___________________________________________________________________ Added: svn:eol-style + native Index: src/java/org/apache/lucene/util/packed/Direct16.java =================================================================== --- src/java/org/apache/lucene/util/packed/Direct16.java (revision 0) +++ src/java/org/apache/lucene/util/packed/Direct16.java (revision 0) @@ -0,0 +1,86 @@ +package org.apache.lucene.util.packed; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.RamUsageEstimator; + +import java.io.IOException; +import java.util.Arrays; + +/** + * Direct wrapping of 16 bit values to a backing array of shorts. + */ + +class Direct16 extends PackedInts.ReaderImpl + implements PackedInts.Mutable { + private short[] blocks; + private static final int BITS_PER_VALUE = 16; + + public Direct16(int valueCount) { + super(valueCount, BITS_PER_VALUE); + blocks = new short[valueCount]; + } + + public Direct16(IndexInput in, int valueCount) throws IOException { + super(valueCount, BITS_PER_VALUE); + short[] blocks = new short[valueCount]; + for(int i=0;i

+ * Note: The blocks are used directly, so changes to the given block will + * affect the structure. + * @param blocks used as the internal backing array. + */ + public Direct16(short[] blocks) { + super(blocks.length, BITS_PER_VALUE); + this.blocks = blocks; + } + + public long get(final int index) { + return 0xFFFFL & blocks[index]; + } + + public void set(final int index, final long value) { + blocks[index] = (short)(value & 0xFFFF); + } + + public long ramBytesUsed() { + return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + + blocks.length * RamUsageEstimator.NUM_BYTES_SHORT; + } + + public void clear() { + Arrays.fill(blocks, (short)0); + } +} Property changes on: src/java/org/apache/lucene/util/packed/Direct16.java ___________________________________________________________________ Added: svn:eol-style + native Index: src/java/org/apache/lucene/util/packed/Direct8.java =================================================================== --- src/java/org/apache/lucene/util/packed/Direct8.java (revision 0) +++ src/java/org/apache/lucene/util/packed/Direct8.java (revision 0) @@ -0,0 +1,86 @@ +package org.apache.lucene.util.packed; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.RamUsageEstimator; + +import java.io.IOException; +import java.util.Arrays; + +/** + * Direct wrapping of 8 bit values to a backing array of bytes. + */ + +class Direct8 extends PackedInts.ReaderImpl + implements PackedInts.Mutable { + private byte[] blocks; + private static final int BITS_PER_VALUE = 8; + + public Direct8(int valueCount) { + super(valueCount, BITS_PER_VALUE); + blocks = new byte[valueCount]; + } + + public Direct8(IndexInput in, int valueCount) + throws IOException { + super(valueCount, BITS_PER_VALUE); + byte[] blocks = new byte[valueCount]; + for(int i=0;i

+ * Note: The blocks are used directly, so changes to the given block will + * affect the structure. + * @param blocks used as the internal backing array. + */ + public Direct8(byte[] blocks) { + super(blocks.length, BITS_PER_VALUE); + this.blocks = blocks; + } + + public long get(final int index) { + return 0xFFL & blocks[index]; + } + + public void set(final int index, final long value) { + blocks[index] = (byte)(value & 0xFF); + } + + public long ramBytesUsed() { + return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + blocks.length; + } + + public void clear() { + Arrays.fill(blocks, (byte)0); + } +} Property changes on: src/java/org/apache/lucene/util/packed/Direct8.java ___________________________________________________________________ Added: svn:eol-style + native Index: src/java/org/apache/lucene/util/packed/PackedWriter.java =================================================================== --- src/java/org/apache/lucene/util/packed/PackedWriter.java (revision 0) +++ src/java/org/apache/lucene/util/packed/PackedWriter.java (revision 0) @@ -0,0 +1,114 @@ +package org.apache.lucene.util.packed; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.store.IndexOutput; + +import java.io.IOException; + +// Packs high order byte first, to match +// IndexOutput.writeInt/Long/Short byte order + +/** + * Generic writer for space-optimal packed values. The resulting bits can be + * used directly by Packed32, Packed64 and PackedDirect* and will always be + * long-aligned. + */ + +class PackedWriter extends PackedInts.Writer { + private long pending; + private int pendingBitPos; + + // masks[n-1] masks for bottom n bits + private final long[] masks; + private int written = 0; + + public PackedWriter(IndexOutput out, int valueCount, int bitsPerValue) + throws IOException { + super(out, valueCount, bitsPerValue); + + pendingBitPos = 64; + masks = new long[bitsPerValue - 1]; + + int v = 1; + for (int i = 0; i < bitsPerValue - 1; i++) { + v *= 2; + masks[i] = v - 1; + } + } + + /** + * Do not call this after finish + */ + @Override + public void add(long v) throws IOException { + assert v <= PackedInts.maxValue(bitsPerValue) : "v=" + v + + " maxValue=" + PackedInts.maxValue(bitsPerValue); + assert v >= 0; + //System.out.println(" packedw add v=" + v + " pendingBitPos=" + pendingBitPos); + + // TODO + if (pendingBitPos >= bitsPerValue) { + // not split + + // write-once, so we can |= w/o first masking to 0s + pending |= v << (pendingBitPos - bitsPerValue); + if (pendingBitPos == bitsPerValue) { + // flush + out.writeLong(pending); + pending = 0; + pendingBitPos = 64; + } else { + pendingBitPos -= bitsPerValue; + } + + } else { + // split + + // write top pendingBitPos bits of value into bottom bits of pending + pending |= (v >> (bitsPerValue - pendingBitPos)) & masks[pendingBitPos - 1]; + //System.out.println(" part1 (v >> " + (bitsPerValue - pendingBitPos) + ") & " + masks[pendingBitPos-1]); + + // flush + out.writeLong(pending); + + // write bottom (bitsPerValue - pendingBitPos) bits of value into top bits of pending + pendingBitPos = 64 - bitsPerValue + pendingBitPos; + //System.out.println(" part2 v << " + pendingBitPos); + pending = (v << pendingBitPos); + } + written++; + } + + @Override + public void finish() throws IOException { + while (written < valueCount) { + add(0L); // Auto flush + } + + if (pendingBitPos != 64) { + out.writeLong(pending); + } + out.writeLong(0L); // Dummy to compensate for not using conditionals + } + + public String toString() { + return "PackedWriter(written " + written + "/" + valueCount + " with " + + bitsPerValue + " bits/value)"; + } +} Property changes on: src/java/org/apache/lucene/util/packed/PackedWriter.java ___________________________________________________________________ Added: svn:eol-style + native Index: src/java/org/apache/lucene/util/packed/Direct64.java =================================================================== --- src/java/org/apache/lucene/util/packed/Direct64.java (revision 0) +++ src/java/org/apache/lucene/util/packed/Direct64.java (revision 0) @@ -0,0 +1,79 @@ +package org.apache.lucene.util.packed; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.RamUsageEstimator; + +import java.io.IOException; +import java.util.Arrays; + +/** + * Direct wrapping of 32 bit values to a backing array of ints. + */ + +class Direct64 extends PackedInts.ReaderImpl + implements PackedInts.Mutable { + private long[] blocks; + private static final int BITS_PER_VALUE = 64; + + public Direct64(int valueCount) { + super(valueCount, BITS_PER_VALUE); + blocks = new long[valueCount]; + } + + public Direct64(IndexInput in, int valueCount) throws IOException { + super(valueCount, BITS_PER_VALUE); + long[] blocks = new long[valueCount]; + for(int i=0;i

+ * Note: The blocks are used directly, so changes to the given block will + * affect the structure. + * @param blocks used as the internal backing array. + */ + public Direct64(long[] blocks) { + super(blocks.length, BITS_PER_VALUE); + this.blocks = blocks; + } + + public long get(final int index) { + return blocks[index]; + } + + public void set(final int index, final long value) { + blocks[index] = value; + } + + public long ramBytesUsed() { + return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + + blocks.length * RamUsageEstimator.NUM_BYTES_LONG; + } + + public void clear() { + Arrays.fill(blocks, 0L); + } +} Property changes on: src/java/org/apache/lucene/util/packed/Direct64.java ___________________________________________________________________ Added: svn:eol-style + native Index: src/java/org/apache/lucene/util/packed/PackedInts.java =================================================================== --- src/java/org/apache/lucene/util/packed/PackedInts.java (revision 0) +++ src/java/org/apache/lucene/util/packed/PackedInts.java (revision 0) @@ -0,0 +1,266 @@ +package org.apache.lucene.util.packed; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.CodecUtil; +import org.apache.lucene.util.Constants; + +import java.io.IOException; + +/** + * Simplistic compression for array of unsigned long values. + * Each value is >= 0 and <= a specified maximum value. The + * values are stored as packed ints, with each value + * consuming a fixed number of bits. + * + * @lucene.internal + */ + +public class PackedInts { + + private final static String CODEC_NAME = "PackedInts"; + private final static int VERSION_START = 0; + private final static int VERSION_CURRENT = VERSION_START; + + /** + * A read-only random access array of positive integers. + * @lucene.internal + */ + public static interface Reader { + /** + * @param index the position of the wanted value. + * @return the value at the stated index. + */ + long get(int index); + + /** + * @return the number of bits used to store any given value. + * Note: This does not imply that memory usage is + * {@code bitsPerValue * #values} as implementations are free to + * use non-space-optimal packing of bits. + */ + int getBitsPerValue(); + + /** + * @return the number of values. + */ + int size(); + } + + /** + * A packed integer array that can be modified. + * @lucene.internal + */ + public static interface Mutable extends Reader { + /** + * Set the value at the given index in the array. + * @param index where the value should be positioned. + * @param value a value conforming to the constraints set by the array. + */ + void set(int index, long value); + + /** + * Sets all values to 0. + */ + + void clear(); + } + + /** + * A simple base for Readers that keeps track of valueCount and bitsPerValue. + * @lucene.internal + */ + public static abstract class ReaderImpl implements Reader { + protected final int bitsPerValue; + protected final int valueCount; + + protected ReaderImpl(int valueCount, int bitsPerValue) { + this.bitsPerValue = bitsPerValue; + assert bitsPerValue > 0 && bitsPerValue <= 64 : "bitsPerValue=" + bitsPerValue; + this.valueCount = valueCount; + } + + public int getBitsPerValue() { + return bitsPerValue; + } + + public int size() { + return valueCount; + } + + public long getMaxValue() { // Convenience method + return maxValue(bitsPerValue); + } + } + + /** A write-once Writer. + * @lucene.internal + */ + public static abstract class Writer { + protected final IndexOutput out; + protected final int bitsPerValue; + protected final int valueCount; + + protected Writer(IndexOutput out, int valueCount, int bitsPerValue) + throws IOException { + assert bitsPerValue <= 64; + + this.out = out; + this.valueCount = valueCount; + this.bitsPerValue = bitsPerValue; + CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT); + out.writeVInt(bitsPerValue); + out.writeVInt(valueCount); + } + + public abstract void add(long v) throws IOException; + public abstract void finish() throws IOException; + } + + /** + * Retrieve PackedInt data from the IndexInput and return a packed int + * structure based on it. + * @param in positioned at the beginning of a stored packed int structure. + * @return a read only random access capable array of positive integers. + * @throws IOException if the structure could not be retrieved. + * @lucene.internal + */ + public static Reader getReader(IndexInput in) throws IOException { + CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START); + final int bitsPerValue = in.readVInt(); + assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue; + final int valueCount = in.readVInt(); + + switch (bitsPerValue) { + case 8: + return new Direct8(in, valueCount); + case 16: + return new Direct16(in, valueCount); + case 32: + return new Direct32(in, valueCount); + case 64: + return new Direct64(in, valueCount); + default: + if (Constants.JRE_IS_64BIT || bitsPerValue >= 32) { + return new Packed64(in, valueCount, bitsPerValue); + } else { + return new Packed32(in, valueCount, bitsPerValue); + } + } + } + + /** + * Create a packed integer array with the given amount of values initialized + * to 0. the valueCount and the bitsPerValue cannot be changed after creation. + * All Mutables known by this factory are kept fully in RAM. + * @param valueCount the number of elements. + * @param bitsPerValue the number of bits available for any given value. + * @return a mutable packed integer array. + * @throws java.io.IOException if the Mutable could not be created. With the + * current implementations, this never happens, but the method + * signature allows for future persistence-backed Mutables. + * @lucene.internal + */ + public static Mutable getMutable( + int valueCount, int bitsPerValue) throws IOException { + switch (bitsPerValue) { + case 8: + return new Direct8(valueCount); + case 16: + return new Direct16(valueCount); + case 32: + return new Direct32(valueCount); + case 64: + return new Direct64(valueCount); + default: + if (Constants.JRE_IS_64BIT || bitsPerValue >= 32) { + return new Packed64(valueCount, bitsPerValue); + } else { + return new Packed32(valueCount, bitsPerValue); + } + } + } + + /** + * Create a packed integer array writer for the given number of values at the + * given bits/value. Writers append to the given IndexOutput and has very + * low memory overhead. + * @param out the destination for the produced bits. + * @param valueCount the number of elements. + * @param bitsPerValue the number of bits available for any given value. + * @return a Writer ready for receiving values. + * @throws IOException if bits could not be written to out. + * @lucene.internal + */ + public static Writer getWriter(IndexOutput out, int valueCount, int bitsPerValue) + throws IOException { + return new PackedWriter(out, valueCount, bitsPerValue); + } + + /** Returns how many bits are required to hold values up + * to and including maxValue + * @param maxValue the maximum value tha should be representable. + * @return the amount of bits needed to represent values from 0 to maxValue. + * @lucene.internal + */ + public static int bitsRequired(long maxValue) { + // Very high long values does not translate well to double, so we do an + // explicit check for the edge cases + if (maxValue > 0x3FFFFFFFFFFFFFFFL) { + return 63; + } if (maxValue > 0x1FFFFFFFFFFFFFFFL) { + return 62; + } + return (int) Math.ceil(Math.log(1+maxValue)/Math.log(2.0)); + } + + /** + * Calculates the maximum unsigned long that can be expressed with the given + * number of bits. + * @param bitsPerValue the number of bits available for any given value. + * @return the maximum value for the given bits. + * @lucene.internal + */ + public static long maxValue(int bitsPerValue) { + return bitsPerValue == 64 ? Long.MAX_VALUE : ~(~0L << bitsPerValue); + } + + /** Rounds bitsPerValue up to 8, 16, 32 or 64. */ + public static int getNextFixedSize(int bitsPerValue) { + if (bitsPerValue <= 8) { + return 8; + } else if (bitsPerValue <= 16) { + return 16; + } else if (bitsPerValue <= 32) { + return 32; + } else { + return 64; + } + } + + /** Possibly wastes some storage in exchange for faster lookups */ + public static int getRoundedFixedSize(int bitsPerValue) { + if (bitsPerValue > 58 || (bitsPerValue < 32 && bitsPerValue > 29)) { // 10% space-waste is ok + return getNextFixedSize(bitsPerValue); + } else { + return bitsPerValue; + } + } +} Property changes on: src/java/org/apache/lucene/util/packed/PackedInts.java ___________________________________________________________________ Added: svn:eol-style + native Index: src/java/org/apache/lucene/util/packed/Packed32.java =================================================================== --- src/java/org/apache/lucene/util/packed/Packed32.java (revision 0) +++ src/java/org/apache/lucene/util/packed/Packed32.java (revision 0) @@ -0,0 +1,220 @@ +package org.apache.lucene.util.packed; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.RamUsageEstimator; + +import java.io.IOException; +import java.util.Arrays; + +/** + * Space optimized random access capable array of values with a fixed number of + * bits. The maximum number of bits/value is 31. Use {@link Packed64} for higher + * numbers. + *

+ * The implementation strives to avoid conditionals and expensive operations, + * sacrificing code clarity to achieve better performance. + */ + +class Packed32 extends PackedInts.ReaderImpl implements PackedInts.Mutable { + static final int BLOCK_SIZE = 32; // 32 = int, 64 = long + static final int BLOCK_BITS = 5; // The #bits representing BLOCK_SIZE + static final int MOD_MASK = BLOCK_SIZE - 1; // x % BLOCK_SIZE + + private static final int ENTRY_SIZE = BLOCK_SIZE + 1; + private static final int FAC_BITPOS = 3; + + /* + * In order to make an efficient value-getter, conditionals should be + * avoided. A value can be positioned inside of a block, requiring shifting + * left or right or it can span two blocks, requiring a left-shift on the + * first block and a right-shift on the right block. + *

+ * By always shifting the first block both left and right, we get exactly + * the right bits. By always shifting the second block right and applying + * a mask, we get the right bits there. After that, we | the two bitsets. + */ + private static final int[][] SHIFTS = + new int[ENTRY_SIZE][ENTRY_SIZE * FAC_BITPOS]; + private static final int[][] MASKS = new int[ENTRY_SIZE][ENTRY_SIZE]; + + static { // Generate shifts + for (int elementBits = 1 ; elementBits <= BLOCK_SIZE ; elementBits++) { + for (int bitPos = 0 ; bitPos < BLOCK_SIZE ; bitPos++) { + int[] currentShifts = SHIFTS[elementBits]; + int base = bitPos * FAC_BITPOS; + currentShifts[base ] = bitPos; + currentShifts[base + 1] = BLOCK_SIZE - elementBits; + if (bitPos <= BLOCK_SIZE - elementBits) { // Single block + currentShifts[base + 2] = 0; + MASKS[elementBits][bitPos] = 0; + } else { // Two blocks + int rBits = elementBits - (BLOCK_SIZE - bitPos); + currentShifts[base + 2] = BLOCK_SIZE - rBits; + MASKS[elementBits][bitPos] = ~(~0 << rBits); + } + } + } + } + + /* + * The setter requires more masking than the getter. + */ + private static final int[][] WRITE_MASKS = + new int[ENTRY_SIZE][ENTRY_SIZE * FAC_BITPOS]; + static { + for (int elementBits = 1 ; elementBits <= BLOCK_SIZE ; elementBits++) { + int elementPosMask = ~(~0 << elementBits); + int[] currentShifts = SHIFTS[elementBits]; + int[] currentMasks = WRITE_MASKS[elementBits]; + for (int bitPos = 0 ; bitPos < BLOCK_SIZE ; bitPos++) { + int base = bitPos * FAC_BITPOS; + currentMasks[base ] =~((elementPosMask + << currentShifts[base + 1]) + >>> currentShifts[base]); + currentMasks[base+1] = ~(elementPosMask + << currentShifts[base + 2]); + currentMasks[base+2] = currentShifts[base + 2] == 0 ? 0 : ~0; + } + } + } + + /* The bits */ + private int[] blocks; + + // Cached calculations + private int maxPos; // blocks.length * BLOCK_SIZE / bitsPerValue - 1 + private int[] shifts; // The shifts for the current bitsPerValue + private int[] readMasks; + private int[] writeMasks; + + /** + * Creates an array with the internal structures adjusted for the given + * limits and initialized to 0. + * @param valueCount the number of elements. + * @param bitsPerValue the number of bits available for any given value. + * Note: bitsPerValue >32 is not supported by this implementation. + */ + public Packed32(int valueCount, int bitsPerValue) { + this(new int[(int)(((long)valueCount) * bitsPerValue / BLOCK_SIZE + 2)], + valueCount, bitsPerValue); + } + + /** + * Creates an array with content retrieved from the given IndexInput. + * @param in an IndexInput, positioned at the start of Packed64-content. + * @param valueCount the number of elements. + * @param bitsPerValue the number of bits available for any given value. + * @throws java.io.IOException if the values for the backing array could not + * be retrieved. + */ + public Packed32(IndexInput in, int valueCount, int bitsPerValue) + throws IOException { + super(valueCount, bitsPerValue); + int size = size(bitsPerValue, valueCount); + blocks = new int[size + 1]; // +1 due to non-conditional tricks + for(int i = 0 ; i < size ; i++) { + blocks[i] = in.readInt(); + } + if (size % 2 == 1) { + in.readInt(); // Align to long + } + updateCached(); + } + + private static int size(int bitsPerValue, int valueCount) { + final long totBitCount = (long) valueCount * bitsPerValue; + return (int) (totBitCount/32 + ((totBitCount % 32 == 0 ) ? 0:1)); + } + + + /** + * Creates an array backed by the given blocks. + *

+ * Note: The blocks are used directly, so changes to the given block will + * affect the Packed32-structure. + * @param blocks used as the internal backing array. + * @param valueCount the number of values. + * @param bitsPerValue the number of bits available for any given value. + * Note: bitsPerValue >32 is not supported by this implementation. + */ + public Packed32(int[] blocks, int valueCount, int bitsPerValue) { + // TODO: Check that blocks.length is sufficient for holding length values + super(valueCount, bitsPerValue); + if (bitsPerValue > 31) { + throw new IllegalArgumentException(String.format( + "This array only supports values of 31 bits or less. The " + + "required number of bits was %d. The Packed64 " + + "implementation allows values with more than 31 bits", + bitsPerValue)); + } + this.blocks = blocks; + updateCached(); + } + + private void updateCached() { + readMasks = MASKS[bitsPerValue]; + maxPos = (int)((((long)blocks.length) * BLOCK_SIZE / bitsPerValue) - 2); + shifts = SHIFTS[bitsPerValue]; + writeMasks = WRITE_MASKS[bitsPerValue]; + } + + /** + * @param index the position of the value. + * @return the value at the given index. + */ + public long get(final int index) { + final long majorBitPos = index * bitsPerValue; + final int elementPos = (int)(majorBitPos >>> BLOCK_BITS); // / BLOCK_SIZE + final int bitPos = (int)(majorBitPos & MOD_MASK); // % BLOCK_SIZE); + + final int base = bitPos * FAC_BITPOS; + + return ((blocks[elementPos] << shifts[base]) >>> shifts[base+1]) | + ((blocks[elementPos+1] >>> shifts[base+2]) & readMasks[bitPos]); + } + + public void set(final int index, final long value) { + final int intValue = (int)value; + final long majorBitPos = index * bitsPerValue; + final int elementPos = (int)(majorBitPos >>> BLOCK_BITS); // / BLOCK_SIZE + final int bitPos = (int)(majorBitPos & MOD_MASK); // % BLOCK_SIZE); + final int base = bitPos * FAC_BITPOS; + + blocks[elementPos ] = (blocks[elementPos ] & writeMasks[base]) + | (intValue << shifts[base + 1] >>> shifts[base]); + blocks[elementPos+1] = (blocks[elementPos+1] & writeMasks[base+1]) + | ((intValue << shifts[base + 2]) + & writeMasks[base+2]); + } + + public void clear() { + Arrays.fill(blocks, 0); + } + + public String toString() { + return "Packed32(bitsPerValue=" + bitsPerValue + ", maxPos=" + maxPos + + ", elements.length=" + blocks.length + ")"; + } + + public long ramBytesUsed() { + return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + + blocks.length * RamUsageEstimator.NUM_BYTES_INT; + } +} Property changes on: src/java/org/apache/lucene/util/packed/Packed32.java ___________________________________________________________________ Added: svn:eol-style + native Index: src/java/org/apache/lucene/util/packed/package.html =================================================================== --- src/java/org/apache/lucene/util/packed/package.html (revision 0) +++ src/java/org/apache/lucene/util/packed/package.html (revision 0) @@ -0,0 +1,16 @@ + + + + + +

+ The packed package provides random access capable arrays of positive longs. + The implementations provides different trade offs between memory usage and + access speed. The standard usage scenario is replacing large int or long + arrays in order to reduce the memory footprint. +

+ The main access point is the {@link PackedInts} factory. +

+ + + Property changes on: src/java/org/apache/lucene/util/packed/package.html ___________________________________________________________________ Added: svn:eol-style + native Index: src/java/org/apache/lucene/util/RamUsageEstimator.java =================================================================== --- src/java/org/apache/lucene/util/RamUsageEstimator.java (revision 921005) +++ src/java/org/apache/lucene/util/RamUsageEstimator.java (working copy) @@ -37,6 +37,16 @@ * @lucene.internal */ public final class RamUsageEstimator { + + public final static int NUM_BYTES_SHORT = 2; + public final static int NUM_BYTES_INT = 4; + public final static int NUM_BYTES_LONG = 8; + public final static int NUM_BYTES_FLOAT = 4; + public final static int NUM_BYTES_DOUBLE = 8; + public final static int NUM_BYTES_OBJ_HEADER = 8; + public final static int NUM_BYTES_OBJ_REF = Constants.JRE_IS_64BIT ? 8 : 4; + public final static int NUM_BYTES_ARRAY_HEADER = NUM_BYTES_OBJ_HEADER + NUM_BYTES_INT + NUM_BYTES_OBJ_REF; + private MemoryModel memoryModel; private final Map seen; @@ -47,11 +57,6 @@ public final static int NUM_BYTES_OBJECT_REF = Constants.JRE_IS_64BIT ? 8 : 4; public final static int NUM_BYTES_CHAR = 2; - public final static int NUM_BYTES_SHORT = 2; - public final static int NUM_BYTES_INT = 4; - public final static int NUM_BYTES_LONG = 8; - public final static int NUM_BYTES_FLOAT = 4; - public final static int NUM_BYTES_DOUBLE = 8; private boolean checkInterned;