Index: ../../work/lucenetrunkGit/lucene-solr/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetBlockTermState.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- ../../work/lucenetrunkGit/lucene-solr/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetBlockTermState.java (revision ) +++ ../../work/lucenetrunkGit/lucene-solr/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetBlockTermState.java (revision ) @@ -0,0 +1,19 @@ +package org.apache.lucene.codecs.bits; + +import org.apache.lucene.codecs.BlockTermState; +import org.apache.lucene.index.TermState; + +/** + * @author ngracheva + */ +public class BitSetBlockTermState extends BlockTermState { + + public long startPostingsPosition; + + @Override + public void copyFrom(TermState _other) { + super.copyFrom(_other); + this.startPostingsPosition = ((BitSetBlockTermState)_other).startPostingsPosition; + } + +} Index: ../../work/lucenetrunkGit/lucene-solr/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetPostingsWriter.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- ../../work/lucenetrunkGit/lucene-solr/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetPostingsWriter.java (revision ) +++ ../../work/lucenetrunkGit/lucene-solr/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetPostingsWriter.java (revision ) @@ -0,0 +1,91 @@ +package org.apache.lucene.codecs.bits; + +import org.apache.lucene.codecs.BlockTermState; +import org.apache.lucene.codecs.PushPostingsWriterBase; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.store.DataOutput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.BytesRef; + +import java.io.IOException; + +/** + * @author ngracheva + */ +class BitSetPostingsWriter extends PushPostingsWriterBase { + public static final int METADATA_LONG_SIZE = 0; + private final IndexOutput pst; + private SegmentWriteState state; + private BitSetPostingsFileWriter postingsWriter; + private long startPostingPosition; + + + public BitSetPostingsWriter(SegmentWriteState state) throws IOException { + String fileName = BitSetPostingsFormat.getPostingsFileName(state.segmentInfo.name, state.segmentSuffix); + pst = state.directory.createOutput(fileName, state.context); + this.state = state; + } + + + @Override + public void startTerm() throws IOException { + startPostingPosition = pst.getFilePointer(); + postingsWriter.startPostingsList(); + } + + @Override + public void finishTerm(BlockTermState state) throws IOException { + postingsWriter.finishPostingsList(); + if (pst.getFilePointer() - startPostingPosition > 0) { + ((BitSetBlockTermState)state).startPostingsPosition = startPostingPosition; + } + } + + @Override + public BlockTermState newTermState() throws IOException { + return new BitSetBlockTermState(); + } + + + @Override + public void encodeTerm(long[] longs, DataOutput out, FieldInfo fieldInfo, BlockTermState state, boolean absolute) throws IOException { + assert longs.length == METADATA_LONG_SIZE; + out.writeVLong(((BitSetBlockTermState) state).startPostingsPosition); + assert fieldInfo.getIndexOptions() == FieldInfo.IndexOptions.DOCS_ONLY? state.totalTermFreq == -1 : state.totalTermFreq != -1; + } + + @Override + public int setField(FieldInfo fieldInfo) { + super.setField(fieldInfo); + postingsWriter = new BitSetPostingsFileWriter(state.segmentInfo.getDocCount(), pst); + return METADATA_LONG_SIZE; + } + + @Override + public void close() throws IOException { + try { + pst.writeString("END"); + } finally { + pst.close(); + } + } + + @Override + public void startDoc(int docID, int freq) throws IOException { + postingsWriter.startDoc(docID, freq); + } + + @Override + public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException { + } + + @Override + public void finishDoc() throws IOException { + } + + @Override + public void init(IndexOutput termsOut) throws IOException { + } + +} Index: ../../work/lucenetrunkGit/lucene-solr/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetPostingsFileWriter.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- ../../work/lucenetrunkGit/lucene-solr/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetPostingsFileWriter.java (revision ) +++ ../../work/lucenetrunkGit/lucene-solr/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetPostingsFileWriter.java (revision ) @@ -0,0 +1,73 @@ +package org.apache.lucene.codecs.bits; + +import org.apache.lucene.store.IndexOutput; + +import java.io.IOException; + +/** + * @author ngracheva +*/ +public class BitSetPostingsFileWriter { + + public static final byte ZERO = 0; + private IndexOutput pst; + private byte word = 0; + private final int docCount; + private boolean started = false; + private int currentWordPosition = 0; + + public BitSetPostingsFileWriter(int docCount, IndexOutput pst) { + this.pst = pst; + this.docCount = docCount; + } + + public void startPostingsList() { + if (started) { + throw new IllegalStateException("Posting writer was started already"); + } + started = true; + currentWordPosition = 0; + } + + public void finishPostingsList() throws IOException { + if (!started) { + throw new IllegalStateException("Posting writer was not started"); + } else { + started = false; + } + currentWordPosition++; + pst.writeByte(word); + word = 0; + int postingsListLength; + if ((docCount & 0x07) == 0) { + postingsListLength = docCount >> 3; + } else { + postingsListLength = (docCount >> 3) + 1; + } + while ((currentWordPosition) <= postingsListLength) { + currentWordPosition++; + pst.writeByte(ZERO); + } + pst.flush(); + } + + public void startDoc(int docID, int freq) throws IOException { + int wordNumber = docID >> 3; + int bitPosition = docID & 0x07; + if (currentWordPosition < wordNumber) { + currentWordPosition++; + pst.writeByte(word); + while (currentWordPosition < wordNumber) { + currentWordPosition++; + pst.writeByte(ZERO); + } + word = 0; + word |= (byte) (1 << bitPosition); + } else if (wordNumber == currentWordPosition) { + word |= (byte) (1 << bitPosition); + } else { + throw new IllegalArgumentException("Docs must be added in ascending order."); + } + } + +} Index: ../../work/lucenetrunkGit/lucene-solr/lucene/codecs/src/test/org/apache/lucene/codecs/bits/BitSecCodecTest.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- ../../work/lucenetrunkGit/lucene-solr/lucene/codecs/src/test/org/apache/lucene/codecs/bits/BitSecCodecTest.java (revision ) +++ ../../work/lucenetrunkGit/lucene-solr/lucene/codecs/src/test/org/apache/lucene/codecs/bits/BitSecCodecTest.java (revision ) @@ -0,0 +1,49 @@ +package org.apache.lucene.codecs.bits; + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.index.BasePostingsFormatTestCase; +import org.apache.lucene.util.TestUtil; + +/** + * Implemented codec supports docs only functionality + * So other use cases were disabled + * @author ngracheva + */ +public class BitSecCodecTest extends BasePostingsFormatTestCase { + + private final Codec codec = TestUtil.alwaysPostingsFormat(new BitSetPostingsFormat()); + + @Override + protected Codec getCodec() { + return codec; + } + + + @Override + public void testDocsAndFreqs() throws Exception { + } + + @Override + public void testDocsAndFreqsAndPositions() throws Exception { + } + + @Override + public void testDocsAndFreqsAndPositionsAndPayloads() throws Exception { + } + + @Override + public void testDocsAndFreqsAndPositionsAndOffsets() throws Exception { + } + + @Override + public void testDocsAndFreqsAndPositionsAndOffsetsAndPayloads() throws Exception { + } + + @Override + public void testRandom() throws Exception { + } + + @Override + public void testInvertedWrite() throws Exception { + } +} Index: ../../work/lucenetrunkGit/lucene-solr/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- ../../work/lucenetrunkGit/lucene-solr/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat (revision 69c5dc76e9ecd06ac23192c0075c1d3692f175a4) +++ ../../work/lucenetrunkGit/lucene-solr/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat (revision ) @@ -22,3 +22,4 @@ org.apache.lucene.codecs.memory.FSTOrdPulsing41PostingsFormat org.apache.lucene.codecs.memory.FSTPostingsFormat org.apache.lucene.codecs.memory.FSTOrdPostingsFormat +org.apache.lucene.codecs.bits.BitSetPostingsFormat Index: ../../work/lucenetrunkGit/lucene-solr/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetPostingsReader.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- ../../work/lucenetrunkGit/lucene-solr/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetPostingsReader.java (revision ) +++ ../../work/lucenetrunkGit/lucene-solr/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetPostingsReader.java (revision ) @@ -0,0 +1,67 @@ +package org.apache.lucene.codecs.bits; + +import org.apache.lucene.codecs.BlockTermState; +import org.apache.lucene.codecs.PostingsReaderBase; +import org.apache.lucene.index.*; +import org.apache.lucene.store.*; +import org.apache.lucene.util.Bits; + +import java.io.IOException; + +/** + * @author ngracheva + */ +public class BitSetPostingsReader extends PostingsReaderBase { + + + private IndexInput pst; + private SegmentReadState segmentReadState; + + + public BitSetPostingsReader(SegmentReadState state) throws IOException { + this.segmentReadState = state; + pst = state.directory.openInput( + BitSetPostingsFormat.getPostingsFileName(state.segmentInfo.name, state.segmentSuffix), + state.context); + } + + @Override + public void init(IndexInput termsIn) throws IOException { + + } + + @Override + public BlockTermState newTermState() throws IOException { + return new BitSetBlockTermState(); + } + + @Override + public void decodeTerm(long[] longs, DataInput in, FieldInfo fieldInfo, BlockTermState state, boolean absolute) throws IOException { + assert longs.length == BitSetPostingsWriter.METADATA_LONG_SIZE; + ((BitSetBlockTermState) state).startPostingsPosition = in.readVLong(); + } + + + @Override + public DocsEnum docs(FieldInfo fieldInfo, BlockTermState state, Bits skipDocs, DocsEnum reuse, int flags) throws IOException { + BitSetBlockTermState postingTermsState = (BitSetBlockTermState) state; + return new BitSetDocsEnum(postingTermsState.startPostingsPosition, skipDocs, pst, segmentReadState.segmentInfo.getDocCount()); + } + + @Override + public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState state, Bits skipDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { + return null; + } + + @Override + public long ramBytesUsed() { + return 0; + } + + @Override + public void close() throws IOException { + pst.close(); + } + + +} Index: ../../work/lucenetrunkGit/lucene-solr/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetPostingsFormat.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- ../../work/lucenetrunkGit/lucene-solr/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetPostingsFormat.java (revision ) +++ ../../work/lucenetrunkGit/lucene-solr/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetPostingsFormat.java (revision ) @@ -0,0 +1,57 @@ +package org.apache.lucene.codecs.bits; + +import org.apache.lucene.codecs.FieldsConsumer; +import org.apache.lucene.codecs.FieldsProducer; +import org.apache.lucene.codecs.PostingsFormat; +import org.apache.lucene.codecs.PostingsReaderBase; +import org.apache.lucene.codecs.blockterms.*; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.util.BytesRef; + +import java.io.IOException; + +/** + * @author ngracheva + */ +public class BitSetPostingsFormat extends PostingsFormat { + + + static final String POSTINGS_EXTENSION = "pst"; + + public BitSetPostingsFormat() { + super("BitSetPostingsFormat"); + } + + public static String getPostingsFileName(String segment, String segmentSuffix) { + return IndexFileNames.segmentFileName(segment, segmentSuffix, POSTINGS_EXTENSION); + } + + @Override + public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { + TermsIndexWriterBase indexWriter = new FixedGapTermsIndexWriter(state); + BitSetPostingsWriter postingsWriter = new BitSetPostingsWriter(state); + return new BlockTermsWriter(indexWriter, state, postingsWriter); + } + + @Override + public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { + PostingsReaderBase postings = new BitSetPostingsReader(state); + TermsIndexReaderBase indexReader = new FixedGapTermsIndexReader(state.directory, + state.fieldInfos, + state.segmentInfo.name, + BytesRef.getUTF8SortedAsUnicodeComparator(), + state.segmentSuffix, + state.context); + + + return new BlockTermsReader(indexReader, + state.directory, + state.fieldInfos, + state.segmentInfo, + postings, + state.context, + state.segmentSuffix); + } +} Index: ../../work/lucenetrunkGit/lucene-solr/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetDocsEnum.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- ../../work/lucenetrunkGit/lucene-solr/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetDocsEnum.java (revision ) +++ ../../work/lucenetrunkGit/lucene-solr/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetDocsEnum.java (revision ) @@ -0,0 +1,110 @@ +package org.apache.lucene.codecs.bits; + +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.Bits; + +import java.io.IOException; + +/** + * Idea is taken from OpenBitSet + * @author ngracheva + */ +public class BitSetDocsEnum extends DocsEnum { + + private int currentBytePosition = -1; + private byte word = 0; + private int docID = -1; + private final int docCount; + private int postingsListLength; + private int indexArray = 0; + private IndexInput pstClone; + private Bits liveDocs; + public BitSetDocsEnum(long filePosition, Bits liveDocs, IndexInput pst, int docCount) throws IOException { + pstClone = pst.clone(); + pstClone.seek(filePosition); + this.docCount = docCount; + this.liveDocs = liveDocs; + if ((this.docCount & 0x07) == 0) { + postingsListLength = this.docCount >> 3; + } else { + postingsListLength = (this.docCount >> 3) + 1; + } + } + + @Override + public int freq() throws IOException { + return 1; + } + + @Override + public int docID() { + return docID; + } + + @Override + public int nextDoc() throws IOException { + do { + if (docID == NO_MORE_DOCS) { + return docID; + } + if (indexArray == 0) { + while (word == 0) { + if (currentBytePosition + 1 >= postingsListLength) { + return docID = NO_MORE_DOCS; + } + currentBytePosition++; + int tmp = pstClone.readByte(); + word = (byte) tmp; + } + indexArray = bitlist[(int) word & 0xff]; + word = 0; // we do not need word any more cos all info we need is stored in indexArray + } + int bitIndex = (indexArray & 0x0f) - 1; + indexArray >>>= 4; + docID = (currentBytePosition << 3) + bitIndex; + } while (!(liveDocs == null || liveDocs.get(docID))); // is doc is alive + return docID; + } + + @Override + public int advance(int target) throws IOException { + return slowAdvance(target); // known issue + } + + @Override + public long cost() { + return 1; + } + + public final static int[] bitlist = { + 0x0, 0x1, 0x2, 0x21, 0x3, 0x31, 0x32, 0x321, 0x4, 0x41, 0x42, 0x421, 0x43, + 0x431, 0x432, 0x4321, 0x5, 0x51, 0x52, 0x521, 0x53, 0x531, 0x532, 0x5321, + 0x54, 0x541, 0x542, 0x5421, 0x543, 0x5431, 0x5432, 0x54321, 0x6, 0x61, 0x62, + 0x621, 0x63, 0x631, 0x632, 0x6321, 0x64, 0x641, 0x642, 0x6421, 0x643, + 0x6431, 0x6432, 0x64321, 0x65, 0x651, 0x652, 0x6521, 0x653, 0x6531, 0x6532, + 0x65321, 0x654, 0x6541, 0x6542, 0x65421, 0x6543, 0x65431, 0x65432, 0x654321, + 0x7, 0x71, 0x72, 0x721, 0x73, 0x731, 0x732, 0x7321, 0x74, 0x741, 0x742, + 0x7421, 0x743, 0x7431, 0x7432, 0x74321, 0x75, 0x751, 0x752, 0x7521, 0x753, + 0x7531, 0x7532, 0x75321, 0x754, 0x7541, 0x7542, 0x75421, 0x7543, 0x75431, + 0x75432, 0x754321, 0x76, 0x761, 0x762, 0x7621, 0x763, 0x7631, 0x7632, + 0x76321, 0x764, 0x7641, 0x7642, 0x76421, 0x7643, 0x76431, 0x76432, 0x764321, + 0x765, 0x7651, 0x7652, 0x76521, 0x7653, 0x76531, 0x76532, 0x765321, 0x7654, + 0x76541, 0x76542, 0x765421, 0x76543, 0x765431, 0x765432, 0x7654321, 0x8, + 0x81, 0x82, 0x821, 0x83, 0x831, 0x832, 0x8321, 0x84, 0x841, 0x842, 0x8421, + 0x843, 0x8431, 0x8432, 0x84321, 0x85, 0x851, 0x852, 0x8521, 0x853, 0x8531, + 0x8532, 0x85321, 0x854, 0x8541, 0x8542, 0x85421, 0x8543, 0x85431, 0x85432, + 0x854321, 0x86, 0x861, 0x862, 0x8621, 0x863, 0x8631, 0x8632, 0x86321, 0x864, + 0x8641, 0x8642, 0x86421, 0x8643, 0x86431, 0x86432, 0x864321, 0x865, 0x8651, + 0x8652, 0x86521, 0x8653, 0x86531, 0x86532, 0x865321, 0x8654, 0x86541, + 0x86542, 0x865421, 0x86543, 0x865431, 0x865432, 0x8654321, 0x87, 0x871, + 0x872, 0x8721, 0x873, 0x8731, 0x8732, 0x87321, 0x874, 0x8741, 0x8742, + 0x87421, 0x8743, 0x87431, 0x87432, 0x874321, 0x875, 0x8751, 0x8752, 0x87521, + 0x8753, 0x87531, 0x87532, 0x875321, 0x8754, 0x87541, 0x87542, 0x875421, + 0x87543, 0x875431, 0x875432, 0x8754321, 0x876, 0x8761, 0x8762, 0x87621, + 0x8763, 0x87631, 0x87632, 0x876321, 0x8764, 0x87641, 0x87642, 0x876421, + 0x87643, 0x876431, 0x876432, 0x8764321, 0x8765, 0x87651, 0x87652, 0x876521, + 0x87653, 0x876531, 0x876532, 0x8765321, 0x87654, 0x876541, 0x876542, + 0x8765421, 0x876543, 0x8765431, 0x8765432, 0x87654321 + }; +}