Index: src/java/org/apache/lucene/index/codecs/gvint/GVIntCodec.java =================================================================== --- src/java/org/apache/lucene/index/codecs/gvint/GVIntCodec.java (revision 0) +++ src/java/org/apache/lucene/index/codecs/gvint/GVIntCodec.java (revision 0) @@ -0,0 +1,153 @@ +package org.apache.lucene.index.codecs.gvint; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Set; + +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.codecs.Codec; +import org.apache.lucene.index.codecs.FieldsConsumer; +import org.apache.lucene.index.codecs.FieldsProducer; +import org.apache.lucene.index.codecs.FixedGapTermsIndexReader; +import org.apache.lucene.index.codecs.FixedGapTermsIndexWriter; +import org.apache.lucene.index.codecs.PostingsReaderBase; +import org.apache.lucene.index.codecs.PostingsWriterBase; +import org.apache.lucene.index.codecs.PrefixCodedTermsReader; +import org.apache.lucene.index.codecs.PrefixCodedTermsWriter; +import org.apache.lucene.index.codecs.TermsIndexReaderBase; +import org.apache.lucene.index.codecs.TermsIndexWriterBase; +import org.apache.lucene.index.codecs.sep.IntStreamFactory; +import org.apache.lucene.index.codecs.sep.SepPostingsReaderImpl; +import org.apache.lucene.index.codecs.sep.SepPostingsWriterImpl; +import org.apache.lucene.index.codecs.standard.StandardCodec; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; + +/** + * + */ +public final class GVIntCodec extends Codec { + + public GVIntCodec() { + name = "GVInt"; + } + + @Override + public String toString() { + return name; + } + + // only for testing + IntStreamFactory getIntFactory() { + return new GVintFactory(); + } + + @Override + public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { + PostingsWriterBase postingsWriter = new SepPostingsWriterImpl(state, new GVintFactory()); + + boolean success = false; + TermsIndexWriterBase indexWriter; + try { + indexWriter = new FixedGapTermsIndexWriter(state); + success = true; + } finally { + if (!success) { + postingsWriter.close(); + } + } + + success = false; + try { + FieldsConsumer ret = new PrefixCodedTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator()); + success = true; + return ret; + } finally { + if (!success) { + try { + postingsWriter.close(); + } finally { + indexWriter.close(); + } + } + } + } + + @Override + public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { + PostingsReaderBase postingsReader = new SepPostingsReaderImpl(state.dir, + state.segmentInfo, + state.readBufferSize, + new GVintFactory()); + + TermsIndexReaderBase indexReader; + boolean success = false; + try { + indexReader = new FixedGapTermsIndexReader(state.dir, + state.fieldInfos, + state.segmentInfo.name, + state.termsIndexDivisor, + BytesRef.getUTF8SortedAsUnicodeComparator()); + success = true; + } finally { + if (!success) { + postingsReader.close(); + } + } + + success = false; + try { + FieldsProducer ret = new PrefixCodedTermsReader(indexReader, + state.dir, + state.fieldInfos, + state.segmentInfo.name, + postingsReader, + state.readBufferSize, + BytesRef.getUTF8SortedAsUnicodeComparator(), + StandardCodec.TERMS_CACHE_SIZE); + success = true; + return ret; + } finally { + if (!success) { + try { + postingsReader.close(); + } finally { + indexReader.close(); + } + } + } + } + + @Override + public void files(Directory dir, SegmentInfo segmentInfo, Set files) { + SepPostingsReaderImpl.files(segmentInfo, files); + PrefixCodedTermsReader.files(dir, segmentInfo, files); + FixedGapTermsIndexReader.files(dir, segmentInfo, files); + } + + @Override + public void getExtensions(Set extensions) { + SepPostingsWriterImpl.getExtensions(extensions); + PrefixCodedTermsReader.getExtensions(extensions); + FixedGapTermsIndexReader.getIndexExtensions(extensions); + } +} + Property changes on: src/java/org/apache/lucene/index/codecs/gvint/GVIntCodec.java ___________________________________________________________________ Added: svn:eol-style + native Added: svn:keywords + Date Author Id Revision HeadURL Index: src/java/org/apache/lucene/index/codecs/gvint/GVintFactory.java =================================================================== --- src/java/org/apache/lucene/index/codecs/gvint/GVintFactory.java (revision 0) +++ src/java/org/apache/lucene/index/codecs/gvint/GVintFactory.java (revision 0) @@ -0,0 +1,42 @@ +package org.apache.lucene.index.codecs.gvint; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; + +import org.apache.lucene.index.codecs.sep.IntIndexInput; +import org.apache.lucene.index.codecs.sep.IntIndexOutput; +import org.apache.lucene.index.codecs.sep.IntStreamFactory; +import org.apache.lucene.store.Directory; + +/** + * {@link IntStreamFactory} for Group VarInt + */ +public final class GVintFactory extends IntStreamFactory { + + @Override + public IntIndexInput openInput(Directory dir, String fileName, + int readBufferSize) throws IOException { + return new GVintIndexInput(dir, fileName, 1024); + } + + @Override + public IntIndexOutput createOutput(Directory dir, String fileName) + throws IOException { + return new GVintIndexOutput(dir, fileName); + } + +} Property changes on: src/java/org/apache/lucene/index/codecs/gvint/GVintFactory.java ___________________________________________________________________ Added: svn:eol-style + native Added: svn:keywords + Date Author Id Revision HeadURL Index: src/java/org/apache/lucene/index/codecs/gvint/GVintIndexInput.java =================================================================== --- src/java/org/apache/lucene/index/codecs/gvint/GVintIndexInput.java (revision 0) +++ src/java/org/apache/lucene/index/codecs/gvint/GVintIndexInput.java (revision 0) @@ -0,0 +1,92 @@ +package org.apache.lucene.index.codecs.gvint; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.codecs.intblock.FixedIntBlockIndexInput; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.CodecUtil; + +/** + * Reads IndexInputs written with {@link GVintIndexOutput}. + * + * @lucene.experimental + */ +public final class GVintIndexInput extends FixedIntBlockIndexInput { + + + + public GVintIndexInput(Directory dir, String fileName, int readBufferSize) + throws IOException { + super(createInput(dir, fileName, readBufferSize)); + + } + + private static final IndexInput createInput(Directory dir, String fileName, + int readBufferSize) throws IOException { + IndexInput input = dir.openInput(fileName, readBufferSize); + CodecUtil.checkHeader(input, GVintIndexOutput.CODEC, + GVintIndexOutput.VERSION_START, GVintIndexOutput.VERSION_START); + return input; + } + + @Override + protected BlockReader getBlockReader(final IndexInput in, final int[] buffer) + throws IOException { + return new GVintBlockReader(in, buffer); + } + + private final class GVintBlockReader implements BlockReader { + private final IndexInput in; + private final int[] buffer; + + private GVintBlockReader(IndexInput in, int[] buffer) { + this.in = in; + this.buffer = buffer; + } + + public void seek(long pos) { + } + + public void readBlock() throws IOException { + final int lengths = 0xFF & in.readByte(); + buffer[0] = readGroupInt((lengths & 0x03), in); + buffer[1] = readGroupInt(((lengths>>2) & 0x03), in); + buffer[2] = readGroupInt(((lengths>>4) & 0x03), in); + buffer[3] = readGroupInt(((lengths>>6)), in); + } + } + + private static int readGroupInt(final int len, final IndexInput input) throws IOException { + switch (len) { + case 0: + return (input.readByte() & 0xFF); + case 1: + return ((input.readByte() & 0xFF)) | ((input.readByte() & 0xFF) << 8); + case 2: + return ((input.readByte() & 0xFF)) | ((input.readByte() & 0xFF) << 8) + | ((input.readByte() & 0xFF) << 16); + default: + return ((input.readByte() & 0xFF)) | ((input.readByte() & 0xFF) << 8) + | ((input.readByte() & 0xFF) << 16) + | ((input.readByte()) << 24); + } + } +} Property changes on: src/java/org/apache/lucene/index/codecs/gvint/GVintIndexInput.java ___________________________________________________________________ Added: svn:eol-style + native Added: svn:keywords + Date Author Id Revision HeadURL Index: src/java/org/apache/lucene/index/codecs/gvint/GVintIndexOutput.java =================================================================== --- src/java/org/apache/lucene/index/codecs/gvint/GVintIndexOutput.java (revision 0) +++ src/java/org/apache/lucene/index/codecs/gvint/GVintIndexOutput.java (revision 0) @@ -0,0 +1,79 @@ +package org.apache.lucene.index.codecs.gvint; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.codecs.intblock.FixedIntBlockIndexOutput; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.CodecUtil; + +/** + * Writes ints as Group VarInt + * + * @lucene.experimental + */ +public final class GVintIndexOutput extends FixedIntBlockIndexOutput { + final static String CODEC = "GVInt"; + final static int VERSION_START = 0; + final static int VERSION_CURRENT = VERSION_START; + private final byte[] byteBuffer = new byte[17]; + + public GVintIndexOutput(Directory dir, String fileName) throws IOException { + super(createOutput(dir, fileName), 4); // use block size of 4 for now + } + + private static IndexOutput createOutput(Directory dir, String fileName) + throws IOException { + IndexOutput output = dir.createOutput(fileName); + CodecUtil.writeHeader(output, CODEC, VERSION_CURRENT); + return output; + } + + @Override + protected void flushBlock() throws IOException { + assert buffer != null && buffer.length == 4; + int lengths = 0; + int bufferPos = 1; + for (int i = 0; i < 4; i++) { + final int current = buffer[i]; + int b; + if (current <= 0xff) { + byteBuffer[bufferPos++] = (byte) (current); + } else if ((b=(current >>> 8)) == 0) { + byteBuffer[bufferPos++] = (byte) (current); + byteBuffer[bufferPos++] = (byte)b; + lengths |= (1 << (i<<1)); + } else if ((b=(current >>> 16)) == 0) { + byteBuffer[bufferPos++] = (byte) (current); + byteBuffer[bufferPos++] = (byte) (current >>> 8); + byteBuffer[bufferPos++] = (byte)b; + lengths |= (2 << (i<<1)); + } else { + byteBuffer[bufferPos++] = (byte) (current); + byteBuffer[bufferPos++] = (byte) (current >>> 8); + byteBuffer[bufferPos++] = (byte) (current >>> 16); + byteBuffer[bufferPos++] = (byte)(current >>> 24); + lengths |= (3 << (i<<1)); + } + } + byteBuffer[0] = (byte)lengths; + out.writeBytes(byteBuffer, bufferPos); + } +} Property changes on: src/java/org/apache/lucene/index/codecs/gvint/GVintIndexOutput.java ___________________________________________________________________ Added: svn:eol-style + native Added: svn:keywords + Date Author Id Revision HeadURL Index: src/test/org/apache/lucene/index/codecs/gvint/GVintMicroBenchmark.java =================================================================== --- src/test/org/apache/lucene/index/codecs/gvint/GVintMicroBenchmark.java (revision 0) +++ src/test/org/apache/lucene/index/codecs/gvint/GVintMicroBenchmark.java (revision 0) @@ -0,0 +1,166 @@ +package org.apache.lucene.index.codecs.gvint; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import java.io.IOException; +import java.util.Random; +import java.util.concurrent.TimeUnit; + +import org.apache.lucene.index.codecs.intblock.FixedIntBlockIndexInput; +import org.apache.lucene.index.codecs.intblock.FixedIntBlockIndexOutput; +import org.apache.lucene.index.codecs.sep.IntIndexInput; +import org.apache.lucene.index.codecs.sep.IntIndexOutput; +import org.apache.lucene.index.codecs.sep.IntStreamFactory; +import org.apache.lucene.index.codecs.sep.IntIndexInput.Reader; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.NIOFSDirectory; + +/** + */ +public class GVintMicroBenchmark { + + public static void main(String[] args) throws IOException { + Random r = new Random(); + int[][] intsToEncode = new int[1000000][]; + System.out.println("|Max random value|GVint ns / value|Vint ns/value|GVint total in ms|Vint total in ms|"); + for (int maxShift = 4; maxShift <= 30; maxShift += 1) { + int max = 1 << maxShift; // 127; + for (int i = 0; i < intsToEncode.length; i++) { + + if (intsToEncode[i] == null) { + intsToEncode[i] = new int[4]; + + } + intsToEncode[i][0] = r.nextInt(max); + intsToEncode[i][1] = r.nextInt(max); + intsToEncode[i][2] = r.nextInt(max); + intsToEncode[i][3] = r.nextInt(max); + } + long gvi = 0; + long vints = 0; + IntStreamFactory gvint = new GVintFactory(); + IntStreamFactory vint = new VIntFactory(); + // warmup + for (int i = 0; i < 3; i++) { + benchRead(intsToEncode, gvint); + benchRead(intsToEncode, vint); + } + for (int i = 0; i < 10; i++) { + if (i % 2 == 0) { + gvi += benchRead(intsToEncode, gvint); + vints += benchRead(intsToEncode, vint); + } else { + vints += benchRead(intsToEncode, vint); + gvi += benchRead(intsToEncode, gvint); + } + } + System.out.println(String.format("|%d|%d|%d|%d|%d|", max-1, (gvi / 10)/(intsToEncode.length * 4) , (vints / 10) / (intsToEncode.length * 4) , + TimeUnit.MILLISECONDS.convert(gvi / 10, TimeUnit.NANOSECONDS), + TimeUnit.MILLISECONDS.convert(vints / 10, TimeUnit.NANOSECONDS))); +// System.out.println("Running 4 Million random ints with max value: " +// + (max)); +// System.out.println("GroupVarInt time per value: " + (gvi / 10) +// / (intsToEncode.length * 4) + "ns - time to decode 4M ints: " +// + TimeUnit.MILLISECONDS.convert(gvi / 10, TimeUnit.NANOSECONDS) +// + "ms"); +// +// System.out.println("Vint time per value: " + (vints / 10) +// / (intsToEncode.length * 4) + "ns - time to decode 4M ints: " +// + TimeUnit.MILLISECONDS.convert(vints / 10, TimeUnit.NANOSECONDS) +// + "ms"); + +// System.out.println(); + } + } + + // run with -server -Xmx512M -Xms512M + + private static long benchRead(int[][] intsToEncode, IntStreamFactory factory) + throws IOException { + File f = new File("/tmp/gvi"); + if (!f.exists()) + f.mkdir(); + String fileName = "ints.dat"; + final Directory dir = new NIOFSDirectory(f); + IntIndexOutput out = factory.createOutput(dir, fileName); + for (int[] is : intsToEncode) { + for (int i = 0; i < is.length; i++) { + out.write(is[i]); + } + } + out.close(); + + final IntIndexInput input = factory.openInput(dir, fileName); + final Reader reader = input.reader(); + long t = System.nanoTime(); + for (int i = 0; i < intsToEncode.length; i++) { + for (int j = 0; j < intsToEncode[i].length; j++) { + int next = reader.next(); + assert next == intsToEncode[i][j]: ""+factory.getClass().getName() + " "+next+ " != " + intsToEncode[i][j]; + } + + } + input.close(); + dir.deleteFile(fileName); + dir.close(); + return System.nanoTime() - t; + } + + static final class VIntFactory extends IntStreamFactory { + + @Override + public IntIndexInput openInput(Directory dir, String fileName, + int readBufferSize) throws IOException { + return new FixedIntBlockIndexInput(dir + .openInput(fileName, readBufferSize)) { + + @Override + protected BlockReader getBlockReader(final IndexInput in, + final int[] buffer) throws IOException { + return new BlockReader() { + public void seek(long pos) { + } + + public void readBlock() throws IOException { + for (int i = 0; i < buffer.length; i++) { + buffer[i] = in.readVInt(); + } + } + }; + } + }; + } + + @Override + public IntIndexOutput createOutput(Directory dir, String fileName) + throws IOException { + return new FixedIntBlockIndexOutput(dir.createOutput(fileName), 1024) { + @Override + protected void flushBlock() throws IOException { + for (int i = 0; i < buffer.length; i++) { + out.writeVInt(buffer[i]); + } + } + }; + } + } + +} Property changes on: src/test/org/apache/lucene/index/codecs/gvint/GVintMicroBenchmark.java ___________________________________________________________________ Added: svn:eol-style + native Added: svn:keywords + Date Author Id Revision HeadURL Index: src/test/org/apache/lucene/index/codecs/gvint/TestGVIntIndexInputOutput.java =================================================================== --- src/test/org/apache/lucene/index/codecs/gvint/TestGVIntIndexInputOutput.java (revision 0) +++ src/test/org/apache/lucene/index/codecs/gvint/TestGVIntIndexInputOutput.java (revision 0) @@ -0,0 +1,65 @@ +package org.apache.lucene.index.codecs.gvint; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.codecs.sep.IntIndexInput; +import org.apache.lucene.index.codecs.sep.IntIndexOutput; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +/** + * Simple testcase for {@link GVintIndexInput} & {@link GVintIndexOutput} + */ +public class TestGVIntIndexInputOutput extends LuceneTestCase { + private GVintFactory f = new GVintFactory(); + + public void testSimpleIntBlocks() throws Exception { + Directory dir = newDirectory(); + IntIndexOutput out = f.createOutput(dir, "test"); + int[] values = new int[11777]; + for(int i=0;i