diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetBlockTermState.java b/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetBlockTermState.java new file mode 100644 index 0000000..bfb4e84 --- /dev/null +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetBlockTermState.java @@ -0,0 +1,14 @@ +package org.apache.lucene.codecs.bits; + +import org.apache.lucene.codecs.BlockTermState; +import org.apache.lucene.index.TermState; + +class BitSetBlockTermState extends BlockTermState { + public long startPostingsPosition; + + @Override + public void copyFrom(TermState _other) { + super.copyFrom(_other); + this.startPostingsPosition = ((BitSetBlockTermState) _other).startPostingsPosition; + } +} diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetCodec.java b/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetCodec.java new file mode 100644 index 0000000..0773361 --- /dev/null +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetCodec.java @@ -0,0 +1,17 @@ +package org.apache.lucene.codecs.bits; + +import org.apache.lucene.codecs.FilterCodec; +import org.apache.lucene.codecs.PostingsFormat; +import org.apache.lucene.codecs.lucene46.Lucene46Codec; + +public class BitSetCodec extends FilterCodec { + + public BitSetCodec() { + super("BitSetCodec", new Lucene46Codec()); + } + + @Override + public PostingsFormat postingsFormat() { + return new BitSetPostingsFormat(); + } +} diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetDocsEnum.java b/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetDocsEnum.java new file mode 100644 index 0000000..02fae83 --- /dev/null +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetDocsEnum.java @@ -0,0 +1,97 @@ +package org.apache.lucene.codecs.bits; + +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.Bits; + +import java.io.IOException; + +class BitSetDocsEnum extends DocsEnum { + + private final long filePosition; + private final Bits liveDocs; + private final IndexInput pstClone; + private final int docCount; + private final int postingsListLength; + + private int currentBytePosition = -1; + private int readByte = 0; + private int docID = -1; + + BitSetDocsEnum(long filePosition, Bits liveDocs, IndexInput pst, int docCount) throws IOException { + this.filePosition = filePosition; + pstClone = pst.clone(); + pstClone.seek(filePosition); + this.docCount = docCount; + this.liveDocs = liveDocs; + if ((this.docCount & 0x07) == 0) { + postingsListLength = this.docCount >> 3; + } else { + postingsListLength = (this.docCount >> 3) + 1; + } + pstClone.seek(filePosition); + } + + @Override + public int freq() throws IOException { + return 1; + } + + @Override + public int docID() { + return docID; + } + + @Override + public int nextDoc() throws IOException { + do { + docID++; + final int subIndex = docID & 0x7; // index within the readByte + if (subIndex == 0) { + if (++currentBytePosition == postingsListLength) { + return docID = NO_MORE_DOCS; + } + readByte = pstClone.readByte() & 0xff; + } + int word = readByte >> subIndex; // skip all the bits to the right of index + if (word != 0) { + docID = docID + Long.numberOfTrailingZeros(word); + } else { + while (++currentBytePosition < postingsListLength) { + readByte = pstClone.readByte() & 0xff; + if (readByte != 0) { + docID = (currentBytePosition << 3) + Long.numberOfTrailingZeros(readByte); + break; + } + } + if (currentBytePosition == postingsListLength) { + return docID = NO_MORE_DOCS; + } + } + } while (!(liveDocs == null || liveDocs.get(docID))); + return docID; + } + + @Override + public int advance(int target) throws IOException { + if (docID == NO_MORE_DOCS || target >= docCount) { + return docID = NO_MORE_DOCS; + } + if (target > 0) { + docID = target - 1; + currentBytePosition = docID >> 3; + if ((target & 0x7) != 0) { + pstClone.seek(filePosition + currentBytePosition); + readByte = pstClone.readByte() & 0xff; + } else { + pstClone.seek(filePosition + currentBytePosition + 1); + } + } + return nextDoc(); + } + + @Override + public long cost() { + return 1; + } +} diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetPostingsFileWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetPostingsFileWriter.java new file mode 100644 index 0000000..544d127 --- /dev/null +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetPostingsFileWriter.java @@ -0,0 +1,69 @@ +package org.apache.lucene.codecs.bits; + +import org.apache.lucene.store.IndexOutput; + +import java.io.IOException; + +class BitSetPostingsFileWriter { + + private static final byte ZERO = 0; + private final IndexOutput pst; + private final int docCount; + private boolean started = false; + private int currentWordPosition = 0; + private byte word = 0; + + BitSetPostingsFileWriter(int docCount, IndexOutput pst) { + this.pst = pst; + this.docCount = docCount; + } + + void startPostingsList() { + if (started) { + throw new IllegalStateException("Posting writer was started already"); + } + started = true; + currentWordPosition = 0; + } + + void finishPostingsList() throws IOException { + if (!started) { + throw new IllegalStateException("Posting writer was not started"); + } else { + started = false; + } + currentWordPosition++; + pst.writeByte(word); + word = 0; + int postingsListLength; + if ((docCount & 0x07) == 0) { + postingsListLength = docCount >> 3; + } else { + postingsListLength = (docCount >> 3) + 1; + } + while ((currentWordPosition) <= postingsListLength) { + currentWordPosition++; + pst.writeByte(ZERO); + } + pst.flush(); + } + + void startDoc(int docID) throws IOException { + int wordNumber = docID >> 3; + int bitPosition = docID & 0x07; + if (currentWordPosition < wordNumber) { + currentWordPosition++; + pst.writeByte(word); + while (currentWordPosition < wordNumber) { + currentWordPosition++; + pst.writeByte(ZERO); + } + word = 0; + word |= (byte) (1 << bitPosition); + } else if (wordNumber == currentWordPosition) { + word |= (byte) (1 << bitPosition); + } else { + throw new IllegalArgumentException("Docs must be added in ascending order."); + } + } +} diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetPostingsFormat.java new file mode 100644 index 0000000..6b6d49b --- /dev/null +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetPostingsFormat.java @@ -0,0 +1,73 @@ +package org.apache.lucene.codecs.bits; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.codecs.FieldsConsumer; +import org.apache.lucene.codecs.FieldsProducer; +import org.apache.lucene.codecs.PostingsFormat; +import org.apache.lucene.codecs.PostingsReaderBase; +import org.apache.lucene.codecs.blockterms.BlockTermsReader; +import org.apache.lucene.codecs.blockterms.BlockTermsWriter; +import org.apache.lucene.codecs.blockterms.FixedGapTermsIndexReader; +import org.apache.lucene.codecs.blockterms.FixedGapTermsIndexWriter; +import org.apache.lucene.codecs.blockterms.TermsIndexReaderBase; +import org.apache.lucene.codecs.blockterms.TermsIndexWriterBase; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.util.BytesRef; + +import java.io.IOException; + +public class BitSetPostingsFormat extends PostingsFormat { + + static final String POSTINGS_EXTENSION = "pst"; + + public BitSetPostingsFormat() { + super("BitSetPostingsFormat"); + } + + static String getPostingsFileName(String segment, String segmentSuffix) { + return IndexFileNames.segmentFileName(segment, segmentSuffix, POSTINGS_EXTENSION); + } + + @Override + public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { + TermsIndexWriterBase indexWriter = new FixedGapTermsIndexWriter(state); + BitSetPostingsWriter postingsWriter = new BitSetPostingsWriter(state); + return new BlockTermsWriter(indexWriter, state, postingsWriter); + } + + @Override + public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { + PostingsReaderBase postings = new BitSetPostingsReader(state); + TermsIndexReaderBase indexReader = new FixedGapTermsIndexReader(state.directory, + state.fieldInfos, + state.segmentInfo.name, + BytesRef.getUTF8SortedAsUnicodeComparator(), + state.segmentSuffix, + state.context); + return new BlockTermsReader(indexReader, + state.directory, + state.fieldInfos, + state.segmentInfo, + postings, + state.context, + state.segmentSuffix); + } +} diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetPostingsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetPostingsReader.java new file mode 100644 index 0000000..0431765 --- /dev/null +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetPostingsReader.java @@ -0,0 +1,79 @@ +package org.apache.lucene.codecs.bits; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.codecs.BlockTermState; +import org.apache.lucene.codecs.PostingsReaderBase; +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.store.DataInput; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.Bits; + +import java.io.IOException; + +class BitSetPostingsReader extends PostingsReaderBase { + + private IndexInput pst; + private SegmentReadState segmentReadState; + + BitSetPostingsReader(SegmentReadState state) throws IOException { + this.segmentReadState = state; + pst = state.directory.openInput( + BitSetPostingsFormat.getPostingsFileName(state.segmentInfo.name, state.segmentSuffix), + state.context); + } + + @Override + public void init(IndexInput termsIn) throws IOException { + } + + @Override + public BlockTermState newTermState() throws IOException { + return new BitSetBlockTermState(); + } + + @Override + public void decodeTerm(long[] longs, DataInput in, FieldInfo fieldInfo, BlockTermState state, boolean absolute) throws IOException { + assert longs.length == BitSetPostingsWriter.METADATA_LONG_SIZE; + ((BitSetBlockTermState) state).startPostingsPosition = in.readVLong(); + } + + @Override + public DocsEnum docs(FieldInfo fieldInfo, BlockTermState state, Bits skipDocs, DocsEnum reuse, int flags) throws IOException { + BitSetBlockTermState postingTermsState = (BitSetBlockTermState) state; + return new BitSetDocsEnum(postingTermsState.startPostingsPosition, skipDocs, pst, segmentReadState.segmentInfo.getDocCount()); + } + + @Override + public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState state, Bits skipDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { + return null; + } + + @Override + public long ramBytesUsed() { + return 0; + } + + @Override + public void close() throws IOException { + pst.close(); + } +} diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetPostingsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetPostingsWriter.java new file mode 100644 index 0000000..eb6fbfd --- /dev/null +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/bits/BitSetPostingsWriter.java @@ -0,0 +1,83 @@ +package org.apache.lucene.codecs.bits; + +import org.apache.lucene.codecs.BlockTermState; +import org.apache.lucene.codecs.PushPostingsWriterBase; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.store.DataOutput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.BytesRef; + +import java.io.IOException; + +class BitSetPostingsWriter extends PushPostingsWriterBase { + static final int METADATA_LONG_SIZE = 0; + private final IndexOutput pst; + private final SegmentWriteState state; + private BitSetPostingsFileWriter postingsWriter; + private long startPostingPosition; + + BitSetPostingsWriter(SegmentWriteState state) throws IOException { + String fileName = BitSetPostingsFormat.getPostingsFileName(state.segmentInfo.name, state.segmentSuffix); + pst = state.directory.createOutput(fileName, state.context); + this.state = state; + } + + @Override + public void startTerm() throws IOException { + startPostingPosition = pst.getFilePointer(); + postingsWriter.startPostingsList(); + } + + @Override + public void finishTerm(BlockTermState state) throws IOException { + postingsWriter.finishPostingsList(); + if (pst.getFilePointer() - startPostingPosition > 0) { + ((BitSetBlockTermState) state).startPostingsPosition = startPostingPosition; + } + } + + @Override + public BlockTermState newTermState() throws IOException { + return new BitSetBlockTermState(); + } + + @Override + public void encodeTerm(long[] longs, DataOutput out, FieldInfo fieldInfo, BlockTermState state, boolean absolute) throws IOException { + if ( FieldInfo.IndexOptions.DOCS_ONLY != fieldInfo.getIndexOptions()) { + throw new IllegalArgumentException("Incorrect index option: " + fieldInfo.getIndexOptions() + ". " + + "Only " + FieldInfo.IndexOptions.DOCS_ONLY + " is accepted."); + } + assert longs.length == METADATA_LONG_SIZE; + out.writeVLong(((BitSetBlockTermState) state).startPostingsPosition); + } + + @Override + public int setField(FieldInfo fieldInfo) { + super.setField(fieldInfo); + postingsWriter = new BitSetPostingsFileWriter(state.segmentInfo.getDocCount(), pst); + return METADATA_LONG_SIZE; + } + + @Override + public void close() throws IOException { + pst.close(); + } + + @Override + public void startDoc(int docID, int freq) throws IOException { + postingsWriter.startDoc(docID); + } + + @Override + public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException { + } + + @Override + public void finishDoc() throws IOException { + } + + @Override + public void init(IndexOutput termsOut) throws IOException { + } +} diff --git a/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec b/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec index fcd5ded..8cb557a 100644 --- a/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec +++ b/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec @@ -14,3 +14,4 @@ # limitations under the License. org.apache.lucene.codecs.simpletext.SimpleTextCodec +org.apache.lucene.codecs.bits.BitSetCodec \ No newline at end of file diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/bits/BitSetCodecTest.java b/lucene/codecs/src/test/org/apache/lucene/codecs/bits/BitSetCodecTest.java new file mode 100644 index 0000000..6bcd905 --- /dev/null +++ b/lucene/codecs/src/test/org/apache/lucene/codecs/bits/BitSetCodecTest.java @@ -0,0 +1,74 @@ +package org.apache.lucene.codecs.bits; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.index.BasePostingsFormatTestCase; +import org.junit.Ignore; + +public class BitSetCodecTest extends BasePostingsFormatTestCase { + + private Codec codec = new BitSetCodec(); + + @Override + protected Codec getCodec() { + return codec; + } + + @Override + @Ignore + public void testDocsAndFreqs() throws Exception { + super.testDocsAndFreqs(); // not supported + } + + @Override + @Ignore + public void testDocsAndFreqsAndPositions() throws Exception { + super.testDocsAndFreqsAndPositions(); // not supported + } + + @Override + @Ignore + public void testDocsAndFreqsAndPositionsAndPayloads() throws Exception { + super.testDocsAndFreqsAndPositionsAndPayloads(); // not supported + } + + @Override + @Ignore + public void testDocsAndFreqsAndPositionsAndOffsets() throws Exception { + super.testDocsAndFreqsAndPositionsAndOffsets(); // not supported + } + + @Override + @Ignore + public void testDocsAndFreqsAndPositionsAndOffsetsAndPayloads() throws Exception { + super.testDocsAndFreqsAndPositionsAndOffsetsAndPayloads(); // not supported + } + + @Override + @Ignore + public void testRandom() throws Exception { + super.testRandom(); // not supported + } + + @Override + @Ignore + public void testInvertedWrite() throws Exception { + super.testInvertedWrite(); // not supported + } +}