Index: lucene/src/java/org/apache/lucene/index/codecs/bulkvint/BulkVIntCodec.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/bulkvint/BulkVIntCodec.java (revision 0) +++ lucene/src/java/org/apache/lucene/index/codecs/bulkvint/BulkVIntCodec.java (revision 0) @@ -0,0 +1,226 @@ +package org.apache.lucene.index.codecs.bulkvint; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Set; + +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.codecs.Codec; +import org.apache.lucene.index.codecs.FieldsConsumer; +import org.apache.lucene.index.codecs.FieldsProducer; +import org.apache.lucene.index.codecs.sep.IntStreamFactory; +import org.apache.lucene.index.codecs.sep.IntIndexInput; +import org.apache.lucene.index.codecs.sep.IntIndexOutput; +import org.apache.lucene.index.codecs.sep.SepPostingsReaderImpl; +import org.apache.lucene.index.codecs.sep.SepPostingsWriterImpl; +import org.apache.lucene.index.codecs.intblock.FixedIntBlockIndexInput; +import org.apache.lucene.index.codecs.intblock.FixedIntBlockIndexOutput; +import org.apache.lucene.index.codecs.FixedGapTermsIndexReader; +import org.apache.lucene.index.codecs.FixedGapTermsIndexWriter; +import org.apache.lucene.index.codecs.PostingsWriterBase; +import org.apache.lucene.index.codecs.PostingsReaderBase; +import org.apache.lucene.index.codecs.PrefixCodedTermsReader; +import org.apache.lucene.index.codecs.PrefixCodedTermsWriter; +import org.apache.lucene.index.codecs.TermsIndexReaderBase; +import org.apache.lucene.index.codecs.TermsIndexWriterBase; +import org.apache.lucene.index.codecs.standard.StandardCodec; +import org.apache.lucene.store.*; +import org.apache.lucene.util.BytesRef; + +/** + * Silly codec that acts like MockFixedIntBlockCodec mostly (uses vint encoding): + * writes a single vint header (uncompressed size of the block in bytes) + * writes the block as a list of vints + */ + +public class BulkVIntCodec extends Codec { + + private final int blockSize; + + public BulkVIntCodec(int blockSize) { + this.blockSize = blockSize; + name = "BulkVInt"; + } + + @Override + public String toString() { + return name + "(blockSize=" + blockSize + ")"; + } + + // only for testing + public IntStreamFactory getIntFactory() { + return new BulkVIntFactory(); + } + + private class BulkVIntFactory extends IntStreamFactory { + + @Override + public IntIndexInput openInput(Directory dir, String fileName, int readBufferSize) throws IOException { + return new FixedIntBlockIndexInput(dir.openInput(fileName, readBufferSize)) { + + @Override + protected BlockReader getBlockReader(final IndexInput in, final int[] buffer) throws IOException { + return new BlockReader() { + final byte bytes[] = new byte[blockSize*5]; // header * max(Vint) + + public void readBlock() throws IOException { + final int numBytes = in.readVInt(); // read header + in.readBytes(bytes, 0, numBytes); // readBytes + + int upto = 0; + + // decode bytes + for(int i=0;i>>= 7; + } + bytes[upto++] = (byte)j; + } + + // write header (length in bytes) + out.writeVInt(upto); + + // write block + out.writeBytes(bytes, 0, upto); + } + }; + } + } + + @Override + public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { + PostingsWriterBase postingsWriter = new SepPostingsWriterImpl(state, new BulkVIntFactory()); + + boolean success = false; + TermsIndexWriterBase indexWriter; + try { + indexWriter = new FixedGapTermsIndexWriter(state); + success = true; + } finally { + if (!success) { + postingsWriter.close(); + } + } + + success = false; + try { + FieldsConsumer ret = new PrefixCodedTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator()); + success = true; + return ret; + } finally { + if (!success) { + try { + postingsWriter.close(); + } finally { + indexWriter.close(); + } + } + } + } + + @Override + public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { + PostingsReaderBase postingsReader = new SepPostingsReaderImpl(state.dir, + state.segmentInfo, + state.readBufferSize, + new BulkVIntFactory(), state.codecId); + + TermsIndexReaderBase indexReader; + boolean success = false; + try { + indexReader = new FixedGapTermsIndexReader(state.dir, + state.fieldInfos, + state.segmentInfo.name, + state.termsIndexDivisor, + BytesRef.getUTF8SortedAsUnicodeComparator(), state.codecId); + success = true; + } finally { + if (!success) { + postingsReader.close(); + } + } + + success = false; + try { + FieldsProducer ret = new PrefixCodedTermsReader(indexReader, + state.dir, + state.fieldInfos, + state.segmentInfo.name, + postingsReader, + state.readBufferSize, + BytesRef.getUTF8SortedAsUnicodeComparator(), + StandardCodec.TERMS_CACHE_SIZE, + state.codecId); + success = true; + return ret; + } finally { + if (!success) { + try { + postingsReader.close(); + } finally { + indexReader.close(); + } + } + } + } + + @Override + public void files(Directory dir, SegmentInfo segmentInfo, String codecId, Set files) { + SepPostingsReaderImpl.files(segmentInfo, codecId, files); + PrefixCodedTermsReader.files(dir, segmentInfo, codecId, files); + FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files); + } + + @Override + public void getExtensions(Set extensions) { + SepPostingsWriterImpl.getExtensions(extensions); + PrefixCodedTermsReader.getExtensions(extensions); + FixedGapTermsIndexReader.getIndexExtensions(extensions); + } +} Property changes on: lucene\src\java\org\apache\lucene\index\codecs\bulkvint\BulkVIntCodec.java ___________________________________________________________________ Added: svn:eol-style + native Index: lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java (revision 1051210) +++ lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java (working copy) @@ -23,6 +23,7 @@ import java.util.Map; import java.util.Set; +import org.apache.lucene.index.codecs.bulkvint.BulkVIntCodec; import org.apache.lucene.index.codecs.preflex.PreFlexCodec; import org.apache.lucene.index.codecs.pulsing.PulsingCodec; import org.apache.lucene.index.codecs.simpletext.SimpleTextCodec; @@ -50,7 +51,7 @@ private final Set knownExtensions = new HashSet(); - public final static String[] CORE_CODECS = new String[] {"Standard", "Pulsing", "PreFlex", "SimpleText", "PatchedFrameOfRef", "FrameOfRef", "PatchedFrameOfRef2"}; + public final static String[] CORE_CODECS = new String[] {"Standard", "Pulsing", "PreFlex", "SimpleText", "PatchedFrameOfRef", "FrameOfRef", "PatchedFrameOfRef2", "BulkVInt"}; public synchronized void register(Codec codec) { if (codec.name == null) { @@ -176,5 +177,6 @@ register(new PatchedFrameOfRefCodec()); register(new FrameOfRefCodec()); register(new PForDeltaFixedIntBlockCodec(128)); + register(new BulkVIntCodec(128)); } }