Index: contrib/kamikaze/build.xml =================================================================== --- contrib/kamikaze/build.xml (revision 0) +++ contrib/kamikaze/build.xml (revision 0) @@ -0,0 +1,124 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Kamikaze]]> + + + + + + + + + + + + + Index: contrib/kamikaze/ivy.xml =================================================================== --- contrib/kamikaze/ivy.xml (revision 0) +++ contrib/kamikaze/ivy.xml (revision 0) @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + Index: contrib/kamikaze/version.properties =================================================================== --- contrib/kamikaze/version.properties (revision 0) +++ contrib/kamikaze/version.properties (revision 0) @@ -0,0 +1 @@ +version=2.0.0 \ No newline at end of file Index: contrib/kamikaze/lib/master/log4j.jar =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Property changes on: contrib/kamikaze/lib/master/log4j.jar ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Index: contrib/kamikaze/lib/master/lucene-core.jar =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Property changes on: contrib/kamikaze/lib/master/lucene-core.jar ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Index: contrib/kamikaze/lib/test/junit.jar =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Property changes on: contrib/kamikaze/lib/test/junit.jar ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Index: contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/api/DocSet.java =================================================================== --- contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/api/DocSet.java (revision 0) +++ contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/api/DocSet.java (revision 0) @@ -0,0 +1,66 @@ +package org.apache.lucene.kamikaze.docidset.api; + +import java.io.IOException; + +import org.apache.lucene.search.DocIdSet; + + +/** + * Represents a sorted integer set + */ + +public abstract class DocSet extends DocIdSet +{ + /** + * Add a doc id to the set + * @param docid + */ + public abstract void addDoc(int docid) throws IOException; + + + /** + * Return the set size + * @return true if present, false otherwise + */ + public boolean find(int val) throws IOException + { + return findWithIndex(val)>-1?true:false; + } + + /** + * Return the set size + * @return index if present, -1 otherwise + */ + public int findWithIndex(int val) throws IOException + { + return -1; + } + + /** + * Gets the number of ids in the set + * @return size of the docset + */ + public int size() throws IOException + { + return 0; + } + + /** + * Return the set size in bytes + * @return index if present, -1 otherwise + */ + public long sizeInBytes() throws IOException + { + return 0; + } + + /** + * Optimize by trimming underlying data structures + */ + public void optimize() throws IOException + { + return; + } + + +} Index: contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/api/StatefulDSIterator.java =================================================================== --- contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/api/StatefulDSIterator.java (revision 0) +++ contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/api/StatefulDSIterator.java (revision 0) @@ -0,0 +1,34 @@ +package org.apache.lucene.kamikaze.docidset.api; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * NOTICE : This classes currently does not exist in any release branch for Lucene until 2.3.2, + * once they are available in a stable release branch, we will eliminate these classes and depend + * on the Lucene release jar directly. + * DATE : 07/14/08 + * + */ + + + +/** + * This abstract class defines methods to iterate over a set of + * non-decreasing doc ids. + */ +public abstract class StatefulDSIterator extends org.apache.lucene.search.DocIdSetIterator{ + abstract public int getCursor(); +} Index: contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/bitset/MyOpenBitSet.java =================================================================== --- contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/bitset/MyOpenBitSet.java (revision 0) +++ contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/bitset/MyOpenBitSet.java (revision 0) @@ -0,0 +1,39 @@ +package org.apache.lucene.kamikaze.docidset.bitset; + +import java.io.Serializable; + +import org.apache.lucene.util.OpenBitSet; + +public class MyOpenBitSet extends OpenBitSet implements Serializable +{ + private static final long serialVersionUID = 1L; + + public MyOpenBitSet() + { + super(); + } + + public MyOpenBitSet(long numBits) + { + super(numBits); + } + + + + /** Set 0/1 at the specified index. + * Note: The value for the bitVal is not checked for 0/1, hence incorrect values passed + * lead to unexpected results + * + * @param index + * @param bitVal + */ + public void fastSetAs(long index, int bitVal) + { + + int wordNum = (int)(index >> 6); + int bit = (int)index & 0x3f; + long bitmask = ((long)bitVal) << bit; + bits[wordNum] |= bitmask; + + } +} Index: contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/compression/CompressedSortedIntegerSegment.java =================================================================== --- contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/compression/CompressedSortedIntegerSegment.java (revision 0) +++ contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/compression/CompressedSortedIntegerSegment.java (revision 0) @@ -0,0 +1,17 @@ +package org.apache.lucene.kamikaze.docidset.compression; + +import java.util.BitSet; + +import org.apache.lucene.util.OpenBitSet; + +public interface CompressedSortedIntegerSegment { + + public OpenBitSet compress(int[] inputSet) throws IllegalArgumentException; + + public long[] compressAlt(int[] inputSet) throws IllegalArgumentException; + + public int[] decompress(BitSet packedSet) throws IllegalArgumentException; + + public int[] decompress(OpenBitSet packedSet); + +} Index: contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/compression/P4DSet.java =================================================================== --- contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/compression/P4DSet.java (revision 0) +++ contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/compression/P4DSet.java (revision 0) @@ -0,0 +1,201 @@ +package org.apache.lucene.kamikaze.docidset.compression; + +import java.io.Serializable; +import java.util.BitSet; + +import org.apache.lucene.kamikaze.docidset.bitset.MyOpenBitSet; +import org.apache.lucene.util.OpenBitSet; + + +/** + * Implementation of the p4delta algorithm for sorted integer arrays based on + * + * 1. Original Algorithm from + * http://homepages.cwi.nl/~heman/downloads/msthesis.pdf 2. Optimization and + * variation from http://www2008.org/papers/pdf/p387-zhangA.pdf + * + * This class is a wrapper around a CompressedSegment based on Lucene OpenBitSet + */ +@Deprecated +public class P4DSet implements CompressedSortedIntegerSegment, Serializable { + + private static final long serialVersionUID = 1L; + + private static int INVALID = -1; + + // Maximum bits that can be used = 32 + + // Byte Mask + private static int BYTE_MASK = 8; + + // 32 bits for retaining base value + private static int BASE_MASK = 32; + + // Header size + private static int HEADER_MASK = BYTE_MASK + BASE_MASK; + + // Parameters for the compressed set + private int _b = INVALID; + + private int _base = INVALID; + + private int _batchSize = INVALID; + + private int _exceptionCount = INVALID; + + private int _exceptionOffset = INVALID; + + private int[] op = null; + + public void setParam(int base, int b, int batchSize, int exceptionCount) { + this._base = base; + this._b = b; + this._batchSize = batchSize; + this._exceptionCount = exceptionCount; + this._exceptionOffset = HEADER_MASK + _b * _batchSize; + op = new int[_batchSize]; + } + + /** + * P4D compression algorithm + * + * @param input + * @return + * @throws IllegalArgumentException + */ + /** + * Alternate implementation for compress + * + * @param input + * @return compressed bitset + * @throws IllegalArgumentException + */ + public OpenBitSet compress(int[] input) throws IllegalArgumentException { + if (_base == INVALID || _b == INVALID) + throw new IllegalArgumentException(" Codec not initialized correctly "); + + int BATCH_MAX = 1 << (_b - 1); + // int validCount = (_batchSize - _exceptionCount)*_b +SIZE_MASK+BASE_MASK; + + // Compression mumbo jumbo + + // Set Size -b+base+compressedSet+exception*BASE_MASK + MyOpenBitSet compressedSet = new MyOpenBitSet((_batchSize) * _b + + HEADER_MASK + _exceptionCount * (BASE_MASK)); + + // Load the b + copyBits(compressedSet, _b, 0, BYTE_MASK); + + // copy the base value to BASE_MASK offset + copyBits(compressedSet, _base, BYTE_MASK, BASE_MASK); + + // Offset is the offset of the next location to place the value + int offset = BYTE_MASK + BASE_MASK; + int exceptionOffset = _exceptionOffset; + int exceptionIndex = 0; + + // 1. Walk the list + // TODO : Optimize this process. + for (int i = 0; i < _batchSize; i++) { + // else copy in the end + if (input[i] < BATCH_MAX) { + copyBits(compressedSet, input[i], offset, _b); + + } else { + // Copy the value to the exception location + // Add a bit marker to place + copyBits(compressedSet, 1 << (_b - 1) | exceptionIndex++, offset, _b); + + // Copy the patch value to patch offset location + copyBits(compressedSet, input[i], exceptionOffset, BASE_MASK); + + // reset exceptionDelta + exceptionOffset += BASE_MASK; + } + + offset += _b; + } + return compressedSet; + } + + private void copyBits(MyOpenBitSet compressedSet, int val, int offset, + int length) { + for (int i = 0; i < length; i++) + compressedSet.fastSetAs(offset + i, val >> i & 1); + } + + // Method to allow iteration in decompressed form + public int get(OpenBitSet compressedSet, int index) { + int retVal = 0; + // This is an exception + if (compressedSet.getBit((index + 1) * _b + HEADER_MASK - 1) == 1) { + + // Get the exception index + for (int j = 0; j < _b - 1; j++) { + // if(compressedSet.fastGet(i*_b+j+header)) + retVal |= (compressedSet.getBit(index * _b + j + HEADER_MASK) << j); + } + + int exOffset = _exceptionOffset + retVal * BASE_MASK; + retVal = 0; + // Get the actual value + for (int j = 0; j < BASE_MASK; j++) { + // if(compressedSet.fastGet(i*_b+j+header)) + retVal |= (compressedSet.getBit(exOffset + j) << j); + } + return retVal; + } else { + for (int j = 0; j < _b - 1; j++) { + // if(compressedSet.fastGet(i*_b+j+header)) + retVal |= (compressedSet.getBit(index * _b + j + HEADER_MASK) << j); + } + return retVal; + + } + + } + + public int[] decompress(OpenBitSet compressedSet) { + // reuse o/p + op[0] = _base; + + // Offset of the exception list + int exceptionOffset = HEADER_MASK + _b * _batchSize; + + // explode and patch + for (int i = 1; i < _batchSize; i++) { + // This is an exception + if (compressedSet.getBit((i + 1) * _b + HEADER_MASK - 1) == 1) { + for (int j = 0; j < BASE_MASK; j++) { + // if(compressedSet.fastGet(i*_b+j+header)) + op[i] |= (compressedSet.getBit(exceptionOffset + j) << j); + } + + exceptionOffset += BASE_MASK; + + } else { + for (int j = 0; j < _b - 1; j++) { + // if(compressedSet.fastGet(i*_b+j+header)) + op[i] |= (compressedSet.getBit(i * _b + j + HEADER_MASK) << j); + } + + } + op[i] += op[i - 1]; + } + return op; + } + + /** + * Method not supported + * + */ + public int[] decompress(BitSet compressedSet) throws IllegalArgumentException { + return null; + } + + public long[] compressAlt(int[] inputSet) throws IllegalArgumentException { + // TODO Auto-generated method stub + return null; + } + +} Index: contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/compression/P4DSetNoBase.java =================================================================== --- contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/compression/P4DSetNoBase.java (revision 0) +++ contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/compression/P4DSetNoBase.java (revision 0) @@ -0,0 +1,383 @@ +package org.apache.lucene.kamikaze.docidset.compression; + +import java.io.Serializable; +import java.util.BitSet; + +import org.apache.lucene.kamikaze.docidset.bitset.MyOpenBitSet; +import org.apache.lucene.util.OpenBitSet; + + +/** + * Implementation of the p4delta algorithm for sorted integer arrays based on + * + * 1. Original Algorithm from + * http://homepages.cwi.nl/~heman/downloads/msthesis.pdf 2. Optimization and + * variation from http://www2008.org/papers/pdf/p387-zhangA.pdf + * + * This class is a wrapper around a CompressedSegment based on Lucene OpenBitSet + */ +public class P4DSetNoBase implements CompressedSortedIntegerSegment, + Serializable { + + private static final long serialVersionUID = 1L; + + private static final int INVALID = -1; + + // Maximum bits that can be used = 32 + + // Byte Mask + private static final int BYTE_MASK = 8; + + // 32 bits for retaining base value + private static final int BASE_MASK = 32; + + // Header size + private static final int HEADER_MASK = BYTE_MASK; + + // Parameters for the compressed set + private int _b = INVALID; + + private int _base = INVALID; + + private int _batchSize = INVALID; + + private int _exceptionCount = INVALID; + + private int _exceptionOffset = INVALID; + + //private int[] op = null; + + + interface Processor extends Serializable + { + public int process(int retval, int exceptionOffset, long[] compressedSet); + }; + + + private static final Processor valueproc[] = + + { + new Processor() { + public final int process(int retVal, int exceptionOffset, long[] compressedSet){ + return retVal; + } + + }, + + new Processor(){ + + public final int process(int retVal, int exceptionOffset, long[] compressedSet){ + // Get the actual value + return getBitSlice(compressedSet, exceptionOffset + retVal * BASE_MASK, BASE_MASK); + } + + } + + }; + + + public void setParam(int base, int b, int batchSize, int exceptionCount) { + this._base = base; + this._b = b; + this._batchSize = batchSize; + + this._exceptionCount = exceptionCount; + this._exceptionOffset = HEADER_MASK + _b * _batchSize; + + + } + + public void updateParams(MyOpenBitSet set) { + _b = getBitSlice(set, 0, BYTE_MASK); + + _exceptionOffset = HEADER_MASK + _b * _batchSize; + } + + public void updateParams(long[] set) { + + _b = getBitSlice(set, 0, BYTE_MASK); + + _exceptionOffset = HEADER_MASK + _b * _batchSize; + } + + + /** + * Alternate implementation for compress + * + * @param input + * @return compressed bitset + * @throws IllegalArgumentException + */ + public OpenBitSet compress(int[] input) throws IllegalArgumentException { + + if (_base == INVALID || _b == INVALID) + throw new IllegalArgumentException(" Codec not initialized correctly "); + + + + int BATCH_MAX = 1 << (_b - 1); + // int validCount = (_batchSize - _exceptionCount)*_b +SIZE_MASK+BASE_MASK; + + // Compression mumbo jumbo + + // Set Size -b+base+compressedSet+exception*BASE_MASK + + MyOpenBitSet compressedSet = new MyOpenBitSet((_batchSize) * _b + + HEADER_MASK + _exceptionCount * (BASE_MASK)); + + // System.out.println("Compressed Set Size : " + compressedSet.capacity()); + + + // Load the b + copyBits(compressedSet, _b, 0, BYTE_MASK); + + // copy the base value to BASE_MASK offset + // copyBits(compressedSet, _base, BYTE_MASK, BASE_MASK); + + // Offset is the offset of the next location to place the value + int offset = HEADER_MASK; + int exceptionOffset = _exceptionOffset; + int exceptionIndex = 0; + + // 1. Walk the list + // TODO : Optimize this process. + for (int i = 0; i < _batchSize; i++) { + // else copy in the end + if (input[i] < BATCH_MAX) { + copyBits(compressedSet, input[i] << 1, offset, _b); + + } else { + // Copy the value to the exception location + // Add a bit marker to place + copyBits(compressedSet, ((exceptionIndex << 1) | 0x1), offset, _b); + // System.out.println("Adding Exception + // Marker:"+(BATCH_MAX|(exceptionIndex-1)) + " at offset:"+offset); + + // Copy the patch value to patch offset location + copyBits(compressedSet, input[i], exceptionOffset, BASE_MASK); + + // reset exceptionDelta + exceptionOffset += BASE_MASK; + exceptionIndex++; + } + + offset += _b; + } + + return compressedSet; + } + + /** + * Alternate implementation for compress + * + * @param input + * @return comprssed set in long array form + * @throws IllegalArgumentException + */ + public long[] compressAlt(int[] input) throws IllegalArgumentException { + + if (_base == INVALID || _b == INVALID) + throw new IllegalArgumentException(" Codec not initialized correctly "); + + + /*for(int i=0;i<_batchSize;i++) + System.out.print(input[i]+":"); + System.out.println("\nB:"+_b)*/ + + + int BATCH_MAX = 1 << (_b - 1); + // int validCount = (_batchSize - _exceptionCount)*_b +SIZE_MASK+BASE_MASK; + + // Compression mumbo jumbo + + // Set Size _b+base+compressedSet+exception*BASE_MASK bits + long[] compressedSet = new long[((((_batchSize) * _b + HEADER_MASK + _exceptionCount * (BASE_MASK)))>>>6)+1]; + + + //new long[((_batchSize) * _b + HEADER_MASK + _exceptionCount * (BASE_MASK))>>>6 + 1]; + // System.out.println("Compressed Set Size : " + compressedSet.capacity()); + + + // Load the b + copyBits(compressedSet, _b, 0, BYTE_MASK); + + // copy the base value to BASE_MASK offset + // copyBits(compressedSet, _base, BYTE_MASK, BASE_MASK); + + // Offset is the offset of the next location to place the value + int offset = HEADER_MASK; + int exceptionOffset = _exceptionOffset; + int exceptionIndex = 0; + + // 1. Walk the list + // TODO : Optimize this process. + for (int i = 0; i < _batchSize; i++) { + // else copy in the end + if (input[i] < BATCH_MAX) { + copyBits(compressedSet, input[i] << 1, offset, _b); + + } else { + // Copy the value to the exception location + // Add a bit marker to place + copyBits(compressedSet, ((exceptionIndex << 1) | 0x1), offset, _b); + // System.out.println("Adding Exception + // Marker:"+(BATCH_MAX|(exceptionIndex-1)) + " at offset:"+offset); + + // Copy the patch value to patch offset location + copyBits(compressedSet, input[i], exceptionOffset, BASE_MASK); + + // reset exceptionDelta + exceptionOffset += BASE_MASK; + exceptionIndex++; + } + + offset += _b; + } + + return compressedSet; + } + + static private void copyBits(MyOpenBitSet compressedSet, int val, int offset, int length) { + final long[] bits = compressedSet.getBits(); + final int index = offset >>> 6; + final int skip = offset & 0x3f; + val &= (0xffffffff >>> (32 - length)); + bits[index] |= (((long)val) << skip); + if (64 - skip < length) { + bits[index + 1] |= ((long)val >>> (64 - skip)); + } + + } + + static private void copyBits(long[] bits, int val, int offset, int length) { + + final int index = offset >>> 6; + final int skip = offset & 0x3f; + val &= (0xffffffff >>> (32 - length)); + bits[index] |= (((long)val) << skip); + if (64 - skip < length) { + bits[index + 1] |= ((long)val >>> (64 - skip)); + } + + } + + static private int getBitSlice(OpenBitSet compressedSet, final int offset, final int length) { + final long[] bits = compressedSet.getBits(); + final int index = offset >>> 6; + final int skip = offset & 0x3f; + int val = (int)(bits[index] >>> skip); + if (64 - skip < length) { + val |= (int)bits[index + 1] << (64 - skip); + } + return val & (0xffffffff >>> (32 - length)); + } + + static private int getBitSlice(long[] bits, final int offset, final int length) { + + final int index = offset >>> 6; + final int skip = offset & 0x3f; + int val = (int)(bits[index] >>> skip); + if (64 - skip < length) { + val |= (int)bits[index + 1] << (64 - skip); + } + return val & (0xffffffff >>> (32 - length)); + } + + + // Method to allow iteration in decompressed form + public final int get(long[] compressedSet, int index) { + final int retVal = getBitSlice(compressedSet, (index * _b + HEADER_MASK), _b); + + // fake the function pointer logic + return valueproc[retVal & 0x1].process(retVal >>> 1, _exceptionOffset, compressedSet); + + } + + /* Method to allow iteration in decompressed form + public int get(OpenBitSet compressedSet, int index) { + final int retVal = getBitSlice(compressedSet, (index * _b + HEADER_MASK), _b); + + // fake the function pointer logic + return valueproc[retVal & 0x1].process(retVal >>> 1, _exceptionOffset, compressedSet); + + + /*This is an exception + if (compressedSet.getBit((index + 1) * _b + HEADER_MASK - 1) == 1) { + + int exOffset = _exceptionOffset + retVal * BASE_MASK; + retVal = 0; + // Get the actual value + for (int j = 0; j < BASE_MASK; j++) + retVal |= (compressedSet.getBit(exOffset + j) << j); + return retVal; + } + else + return retVal; + }*/ + + public int[] decompress(OpenBitSet compressedSet) { + + int[] op = new int[_batchSize]; + // reuse o/p + op[0] = _base; + + // Offset of the exception list + int exceptionOffset = HEADER_MASK + _b * _batchSize; + + // explode and patch + for (int i = 1; i < _batchSize; i++) { + int val = getBitSlice(compressedSet, i * _b + HEADER_MASK, _b); + + if ((val & 0x1) != 0) { + // This is an exception + op[i] = getBitSlice(compressedSet, exceptionOffset, BASE_MASK); + exceptionOffset += BASE_MASK; + } else { + op[i] = val >>> 1; + } + op[i] += op[i - 1]; + } + return op; + } + + public int[] decompress(long[] compressedSet) { + int[] op = new int[_batchSize]; + // reuse o/p + op[0] = _base; + + // Offset of the exception list + int exceptionOffset = HEADER_MASK + _b * _batchSize; + + // explode and patch + for (int i = 1; i < _batchSize; i++) { + int val = getBitSlice(compressedSet, i * _b + HEADER_MASK, _b); + + if ((val & 0x1) != 0) { + // This is an exception + op[i] = getBitSlice(compressedSet, exceptionOffset, BASE_MASK); + exceptionOffset += BASE_MASK; + } else { + op[i] = val >>> 1; + } + op[i] += op[i - 1]; + } + return op; + } + + /** + * Method not supported + * + */ + public int[] decompress(BitSet compressedSet) throws IllegalArgumentException { + return null; + } + + public String printParams() { + return "b val:" + _b + " exceptionOffset:" + _exceptionOffset; + } + + + + + +} Index: contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/impl/AbstractDocSet.java =================================================================== --- contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/impl/AbstractDocSet.java (revision 0) +++ contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/impl/AbstractDocSet.java (revision 0) @@ -0,0 +1,305 @@ +package org.apache.lucene.kamikaze.docidset.impl; + +import java.io.Serializable; +import java.util.BitSet; + +import org.apache.lucene.kamikaze.docidset.api.DocSet; +import org.apache.lucene.kamikaze.docidset.bitset.MyOpenBitSet; +import org.apache.lucene.kamikaze.docidset.utils.LongSegmentArray; + + +public abstract class AbstractDocSet extends DocSet implements Serializable { + + private static final long serialVersionUID = 1L; + + private static final double logBase2 = Math.log(2); + + public static final int DEFAULT_BATCH_SIZE = 256; + + + + /** + * Default batch size for compression blobs + * + */ + public int BATCH_SIZE = DEFAULT_BATCH_SIZE; + + /** + * Default batch size for compression blobs + * + */ + protected int BATCH_OVER = 12; + + /** + * Current base size + * + */ + protected int current_base; + + /** + * Last added value + * + */ + protected int lastAdded = 0; + + /** + * List of Data blobs + * + + protected MyOpenBitSetArray blob = null;*/ + + + /** + * List of Data blobs + * + */ + protected LongSegmentArray blob = null; + + /** + * Pointer to the current data block. + * + */ + protected int[] current = null; + + /** + * Size of the current array + * + */ + protected int current_size = 0; + + /** + * Current Max bit count + * + */ + protected int current_ex_count = 0; + + /** + * Current Bit Size + * + */ + protected int current_b = 1; + + /** + * B Value accumulator + * + */ + protected int[] bVal = null; + + /** + * compressed bit size + */ + /** + * Compressed Bits + */ + protected long compressedBits; + + + + /** + * Internal compression Method + * @return compressed object + */ + protected abstract Object compress(); + // protected abstract Object compressAlt(); + + protected AbstractDocSet() { + this.blob = new LongSegmentArray(); + + } + + /** + * Internal Decompression Method + * + * @return + */ + private int[] decompress(MyOpenBitSet packedSet) { + System.err.println("Method not implemented"); + return null; + } + + /** + * Internal Decompression Method + * + * @return decompressed in the form of integer array + */ + protected int[] decompress(BitSet packedSet) { + System.err.println("Method not implemented"); + return null; + } + + private void initSet() { + this.current = new int[BATCH_SIZE]; + current_size = 0; + current_b = 32; + // blob = new ArrayList(); + bVal = new int[33]; + } + + /** + * Number of compressed units plus the last block + * @return docset size + */ + public int size() { + return blob.size() * BATCH_SIZE + current_size; + } + + + + /** + * Add document to this set + * + */ + public void addDoc(int docid) { + if (size() == 0) { + initSet(); + current[current_size++] = docid; + current_base = docid; + lastAdded = current_base; + } + + else if (current_size == BATCH_SIZE) { + current_b = 32; + current_ex_count = 0; + + int totalBitSize = current_b * BATCH_SIZE; + int exceptionCount = 0; + + // formulate b value. Minimum bits used is minB. + for (int b = 32; b > 0; b--) + { + exceptionCount += bVal[b]; + + // break if exception count is too large for this b + if((getNumBits(exceptionCount) + 1) >= b) break; + + if ((exceptionCount * 32 + b * BATCH_SIZE) < totalBitSize) + { + // this is the best parameter so far + current_b = b; + current_ex_count = exceptionCount; + } + } + + long[] myop = (long[]) compress(); + compressedBits+=myop.length<<6; + blob.add(myop); + + // roll the batch + current_size = 1; + current_base = docid; + lastAdded = current_base; + current[0] = current_base; + current_ex_count = 0; + + bVal = new int[33]; + + }// end batch boundary + + else { + try { + int delta = docid - lastAdded; + current[current_size] = delta; + lastAdded = docid; + if (delta != 0) + bVal[getNumBits(delta)]++; + + current_size++; + } catch (ArrayIndexOutOfBoundsException w) { + System.err.println("Error inserting DOC:" + docid); + + } + + } // end append to end of array + + } + + /** + * Add document to this set + * + + public void addDoc(int docid) { + if (size() == 0) { + initSet(); + current[current_size++] = docid; + current_base = docid; + lastAdded = current_base; + } + + else if (current_size == BATCH_SIZE) { + + int exceptionCount = 0; + + // formulate b value. Minimum bits used is 5. + for (int k = 31; k > 3; k--) { + // System.out.print(bVal[k]+":"); + exceptionCount += bVal[k]; + if (exceptionCount >= BATCH_OVER) { + current_b = k; + exceptionCount -= bVal[k]; + break; + } + } + + // Compensate for extra bit + current_b += 1; + + // set current_exception_count + current_ex_count = exceptionCount; + + MyOpenBitSet myop = (MyOpenBitSet) compress(); + compressedBits+=myop.capacity(); + blob.add(myop); + + // roll the batch + current_size = 1; + current_base = docid; + lastAdded = current_base; + current[0] = current_base; + current_ex_count = 0; + + bVal = new int[33]; + + }// end batch boundary + + else { + try { + + current[current_size] = docid - lastAdded; + lastAdded = docid; + if (current[current_size] != 0) + bVal[(int) (Math.log(current[current_size]) / logBase2) + 1]++; + + current_size++; + } catch (ArrayIndexOutOfBoundsException w) { + System.err.println("Error inserting DOC:" + docid); + + } + + } // end append to end of array + + }*/ + + private static final int[] NUMBITS = new int[256]; + static { + NUMBITS[0] = 1; + for(int i = 1; i < 256; i++) + { + int j = 7; + while(j > 0) + { + if((i & (1 << j)) != 0) break; + j--; + } + NUMBITS[i] = j + 1; + } + } + + private static int getNumBits(int v) + { + int n; + if((n = v >>> 24) > 0) return(NUMBITS[n] + 24); + if((n = v >>> 16) > 0) return(NUMBITS[n] + 16); + if((n = v >>> 8) > 0) return(NUMBITS[n] + 8); + return NUMBITS[v]; + } +} Index: contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/impl/AndDocIdSet.java =================================================================== --- contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/impl/AndDocIdSet.java (revision 0) +++ contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/impl/AndDocIdSet.java (revision 0) @@ -0,0 +1,184 @@ +package org.apache.lucene.kamikaze.docidset.impl; + +import java.io.IOException; +import java.io.Serializable; +import java.util.Comparator; +import java.util.List; + +import org.apache.log4j.Logger; +import org.apache.lucene.kamikaze.docidset.api.StatefulDSIterator; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; + + + + +public class AndDocIdSet extends ImmutableDocSet implements Serializable { + private static final long serialVersionUID = 1L; + + private static Logger log = Logger.getLogger(AndDocIdSet.class); + + public class DescDocIdSetComparator implements Comparator, + Serializable { + private static final long serialVersionUID = 1L; + + public int compare(StatefulDSIterator o1, StatefulDSIterator o2) { + return o2.docID() - o1.docID(); + } + + } + + private List sets = null; + private int nonNullSize; // excludes nulls + + public AndDocIdSet(List docSets) { + this.sets = docSets; + int size = 0; + if (sets != null) { + for(DocIdSet set : sets) { + if(set != null) size++; + } + } + nonNullSize = size; + } + + class AndDocIdSetIterator extends DocIdSetIterator { + int lastReturn = -1; + private DocIdSetIterator[] iterators = null; + + AndDocIdSetIterator() throws IOException{ + if (nonNullSize < 1) + throw new IllegalArgumentException("Minimum one iterator required"); + + iterators = new DocIdSetIterator[nonNullSize]; + int j = 0; + for (DocIdSet set : sets) { + if (set != null) { + DocIdSetIterator dcit = set.iterator(); + iterators[j++] = dcit; + } + } + lastReturn = (iterators.length > 0 ? -1 : DocIdSetIterator.NO_MORE_DOCS); + } + + @Override + public final int docID() { + return lastReturn; + } + + @Override + public final int nextDoc() throws IOException { + + if (lastReturn == DocIdSetIterator.NO_MORE_DOCS) return DocIdSetIterator.NO_MORE_DOCS; + + DocIdSetIterator dcit = iterators[0]; + int target = dcit.nextDoc(); + int size = iterators.length; + int skip = 0; + int i = 1; + while (i < size) { + if (i != skip) { + dcit = iterators[i]; + int docid = dcit.advance(target); + if (docid > target) { + target = docid; + if(i != 0) { + skip = i; + i = 0; + continue; + } + else + skip = 0; + } + } + i++; + } + return (lastReturn = target); + } + + @Override + public final int advance(int target) throws IOException { + + if (lastReturn == DocIdSetIterator.NO_MORE_DOCS) return DocIdSetIterator.NO_MORE_DOCS; + + DocIdSetIterator dcit = iterators[0]; + target = dcit.advance(target); + int size = iterators.length; + int skip = 0; + int i = 1; + while (i < size) { + if (i != skip) { + dcit = iterators[i]; + int docid = dcit.advance(target); + if (docid > target) { + target = docid; + if(i != 0) { + skip = i; + i = 0; + continue; + } + else + skip = 0; + } + } + i++; + } + return (lastReturn = target); + } + } + + public final DocIdSetIterator iterator() throws IOException{ + return new AndDocIdSetIterator(); + //return new AndDocIdSetIterator2(sets); + } + + /** + * Find existence in the set with index + * + * NOTE : Expensive call. Avoid. + * @param val value to find the index for + * @return index where the value is + */ + @Override + public final int findWithIndex(int val) throws IOException + { + DocIdSetIterator finder = new AndDocIdSetIterator(); + int cursor = -1; + try { + int docid; + while((docid = finder.nextDoc())!=DocIdSetIterator.NO_MORE_DOCS) + { + if(docid > val) + return -1; + else if(docid == val ) + return ++cursor; + else + ++cursor; + + + } + } catch (IOException e) { + return -1; + } + return -1; + } + + @Override + public final boolean find(int val) throws IOException{ + + DocIdSetIterator finder = new AndDocIdSetIterator(); + + try { + int docid = finder.advance(val); + if(docid!=DocIdSetIterator.NO_MORE_DOCS && docid == val) + return true; + else + return false; + } + catch (IOException e) { + return false; + } + } + + +} Index: contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/impl/ImmutableDocSet.java =================================================================== --- contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/impl/ImmutableDocSet.java (revision 0) +++ contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/impl/ImmutableDocSet.java (revision 0) @@ -0,0 +1,45 @@ +package org.apache.lucene.kamikaze.docidset.impl; + +import java.io.IOException; +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.apache.lucene.kamikaze.docidset.api.DocSet; +import org.apache.lucene.search.DocIdSetIterator; + + +public abstract class ImmutableDocSet extends DocSet +{ + private static final long serialVersionUID = 1L; + + private int size = -1; + private Logger log = Logger.getLogger(ImmutableDocSet.class.getName()); + + @Override + public void addDoc(int docid) + { + throw new java.lang.UnsupportedOperationException("Attempt to add document to an immutable data structure"); + + } + + + @Override + public int size() throws IOException + { + // Do the size if we haven't done it so far. + if(size < 0) + { + DocIdSetIterator dcit = this.iterator(); + size = 0; + try { + while(dcit.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) + size++; + } catch (IOException e) { + log.log(Level.SEVERE, "Error computing size.."); + return -1; + } + } + return size; + } + +} Index: contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/impl/ImmutableIntArrayDocIdSet.java =================================================================== --- contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/impl/ImmutableIntArrayDocIdSet.java (revision 0) +++ contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/impl/ImmutableIntArrayDocIdSet.java (revision 0) @@ -0,0 +1,74 @@ +package org.apache.lucene.kamikaze.docidset.impl; + +import org.apache.lucene.kamikaze.docidset.utils.IntArray; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; + + +public class ImmutableIntArrayDocIdSet extends DocIdSet { + private final int[] _array; + + public ImmutableIntArrayDocIdSet(int[] array){ + _array = array; + } + + @Override + public DocIdSetIterator iterator() { + return new ImmutableIntArrayDocIdSetIterator(_array); + } + + @Override + public final boolean isCacheable() { + return true; + } + + public static class ImmutableIntArrayDocIdSetIterator extends DocIdSetIterator{ + private int _doc; + private int cursor; + private final int[] _array; + + public ImmutableIntArrayDocIdSetIterator(int[] array){ + _array=array; + _doc = -1; + cursor=-1; + } + + @Override + final public int docID(){ + return _doc; + } + + @Override + public int nextDoc() throws java.io.IOException{ + if (++cursor < _array.length) { + _doc = _array[cursor]; + } + else{ + _doc = DocIdSetIterator.NO_MORE_DOCS; + } + return _doc; + } + + @Override + public int advance(int target) throws java.io.IOException{ + if (cursor >= _array.length || _array.length == -1) return DocIdSetIterator.NO_MORE_DOCS; + if (target <= _doc) target = _doc + 1; + int index = IntArray.binarySearch(_array, cursor, _array.length, target); + if (index > 0){ + cursor = index; + _doc = _array[cursor]; + return _doc; + } + else{ + cursor = -(index+1); + if (cursor>_array.length) { + _doc = DocIdSetIterator.NO_MORE_DOCS; + } + else { + _doc = _array[cursor]; + } + return _doc; + } + } + } +} Index: contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/impl/IntArrayDocIdSet.java =================================================================== --- contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/impl/IntArrayDocIdSet.java (revision 0) +++ contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/impl/IntArrayDocIdSet.java (revision 0) @@ -0,0 +1,142 @@ +package org.apache.lucene.kamikaze.docidset.impl; + +import java.io.IOException; +import java.io.Serializable; + +import org.apache.lucene.kamikaze.docidset.api.DocSet; +import org.apache.lucene.kamikaze.docidset.api.StatefulDSIterator; +import org.apache.lucene.kamikaze.docidset.utils.IntArray; +import org.apache.lucene.search.DocIdSetIterator; + + +public class IntArrayDocIdSet extends DocSet implements Serializable { + + private static final long serialVersionUID = 1L; + + private IntArray array = null; + + private int pos = -1; + + public IntArrayDocIdSet(int length) { + array = new IntArray(length); + } + + public IntArrayDocIdSet() { + array = new IntArray(); + } + + public void addDoc(int docid) { + ++pos; + array.add(docid); + } + + @Override + public final boolean isCacheable() { + return true; + } + + protected int binarySearchForNearest(int val, int begin, int end) { + + int mid = (begin + end) / 2; + int midval = array.get(mid); + + if(mid == end) + return midval>=val? mid : -1; + + if (midval < val) { + // Find number equal or greater than the target. + if (array.get(mid + 1) >= val) return mid + 1; + + return binarySearchForNearest(val, mid + 1, end); + } + else { + // Find number equal or greater than the target. + if (midval == val) return mid; + + return binarySearchForNearest(val, begin, mid); + } + } + + class IntArrayDocIdSetIterator extends StatefulDSIterator { + int lastReturn = -1; + + int cursor = -1; + + public IntArrayDocIdSetIterator() + { + if(pos == -1) lastReturn = DocIdSetIterator.NO_MORE_DOCS; + } + + @Override + public int docID() { + return lastReturn; + } + + @Override + public int nextDoc() throws IOException { + if (cursor < pos) { + return (lastReturn = array.get(++cursor)); + } + return (lastReturn = DocIdSetIterator.NO_MORE_DOCS); + } + + @Override + public int advance(int target) throws IOException { + if (lastReturn == DocIdSetIterator.NO_MORE_DOCS) return DocIdSetIterator.NO_MORE_DOCS; + + if (target <= lastReturn) target = lastReturn + 1; + + int end = Math.min(cursor + (target - lastReturn), pos); + int index = binarySearchForNearest(target, cursor + 1, end); + + if (index == -1) { + cursor = pos; + return (lastReturn = DocIdSetIterator.NO_MORE_DOCS); + } else { + cursor = index; + return (lastReturn = array.get(cursor)); + } + } + + @Override + public int getCursor() { + return cursor; + } + } + + @Override + public IntArrayDocIdSetIterator iterator() { + return new IntArrayDocIdSetIterator(); + } + + public int size() { + return pos + 1; + } + @Override + public int findWithIndex(int val) { + IntArrayDocIdSetIterator dcit = new IntArrayDocIdSetIterator(); + try { + int docid = dcit.advance(val); + if (docid == val) + return dcit.getCursor(); + } catch (IOException e) { + e.printStackTrace(); + } + return -1; + } + + @Override + public long sizeInBytes() + { + //Object Overhead + return array.length()*4 + 64; + } + + @Override + public void optimize() + { + this.array.seal(); + } + + +} Index: contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/impl/NotDocIdSet.java =================================================================== --- contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/impl/NotDocIdSet.java (revision 0) +++ contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/impl/NotDocIdSet.java (revision 0) @@ -0,0 +1,117 @@ +package org.apache.lucene.kamikaze.docidset.impl; + +import java.io.IOException; +import java.io.Serializable; + +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; + + +public class NotDocIdSet extends ImmutableDocSet implements Serializable { + + private static final long serialVersionUID = 1L; + + private DocIdSet innerSet = null; + + private int max = -1; + + public NotDocIdSet(DocIdSet docSet, int maxVal) { + innerSet = docSet; + max = maxVal; + } + + class NotDocIdSetIterator extends DocIdSetIterator { + int lastReturn = -1; + private DocIdSetIterator it1 = null; + private int innerDocid = -1; + + NotDocIdSetIterator() throws IOException{ + initialize(); + } + + private void initialize() throws IOException{ + it1 = innerSet.iterator(); + + try { + if ((innerDocid = it1.nextDoc()) == DocIdSetIterator.NO_MORE_DOCS) it1 = null; + } catch (IOException e) { + e.printStackTrace(); + } + } + + @Override + public int docID() { + return lastReturn; + } + + @Override + public int nextDoc() throws IOException { + return advance(0); + } + + @Override + public int advance(int target) throws IOException { + + if (lastReturn == DocIdSetIterator.NO_MORE_DOCS) { + return DocIdSetIterator.NO_MORE_DOCS; + } + + if (target <= lastReturn) target = lastReturn + 1; + + if (it1 != null && innerDocid < target) { + if ((innerDocid = it1.advance(target)) == DocIdSetIterator.NO_MORE_DOCS) { + it1 = null; + } + } + + while (it1 != null && innerDocid == target) { + target++; + if (target >= max) { + return (lastReturn = DocIdSetIterator.NO_MORE_DOCS); + } + if ((innerDocid = it1.advance(target)) == DocIdSetIterator.NO_MORE_DOCS) { + it1 = null; + } + } + return (lastReturn = target); + } + } + + @Override + public DocIdSetIterator iterator() throws IOException{ + return new NotDocIdSetIterator(); + } + + /** + * Find existence in the set with index + * + * NOTE : Expensive call. Avoid. + * @param val value to find the index for + * @return index if the given value + */ + @Override + public int findWithIndex(int val) throws IOException + { + DocIdSetIterator finder = new NotDocIdSetIterator(); + int cursor = -1; + try { + int docid; + while((docid = finder.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) + { + if(docid > val) + return -1; + else if(docid == val ) + return ++cursor; + else + ++cursor; + + + } + } catch (IOException e) { + return -1; + } + return -1; + } + + +} Index: contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/impl/OBSDocIdSet.java =================================================================== --- contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/impl/OBSDocIdSet.java (revision 0) +++ contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/impl/OBSDocIdSet.java (revision 0) @@ -0,0 +1,166 @@ +package org.apache.lucene.kamikaze.docidset.impl; + +import java.io.IOException; +import java.io.Serializable; + +import org.apache.lucene.kamikaze.docidset.api.DocSet; +import org.apache.lucene.kamikaze.docidset.api.StatefulDSIterator; +import org.apache.lucene.kamikaze.docidset.bitset.MyOpenBitSet; +import org.apache.lucene.search.DocIdSetIterator; + + +public class OBSDocIdSet extends DocSet implements Serializable { + + private static final long serialVersionUID = 1L; + + private MyOpenBitSet bitSet = null; + + int min = -1; + + int max = -1; + + public OBSDocIdSet(int length) { + bitSet = new MyOpenBitSet(length); + } + + public void addDoc(int docid) { + if (min == -1) { + min = docid; + } + max = docid; + bitSet.set(max - min); + + } + + @Override + public final boolean isCacheable() { + return true; + } + + class OBSDocIdSetIterator extends StatefulDSIterator { + int lastReturn = -1; + private int cursor = -1; + private int marker=-1; + + @Override + public int docID() { + return lastReturn + min; + } + + @Override + public int nextDoc() throws IOException { + + if (bitSet.size() - 1 > lastReturn) { + if (lastReturn == -1) { + + if (bitSet.fastGet(0)) { + lastReturn = 0;cursor++; marker = lastReturn; + + return lastReturn + min; + } + } else + lastReturn = bitSet.nextSetBit(lastReturn + 1); + + if (lastReturn != -1) + { + cursor++; marker = lastReturn; + return lastReturn + min; + } + } + return DocIdSetIterator.NO_MORE_DOCS; + + } + + @Override + public int advance(int target) throws IOException { + if (target > max) + return DocIdSetIterator.NO_MORE_DOCS; + + target -= min; // adjust target to the local offset + if (target <= lastReturn) target = lastReturn + 1; + + if(target <= 0) { + if (bitSet.fastGet(0)) { + lastReturn = 0; + return min; + } + } + else { + lastReturn = bitSet.nextSetBit(target); + if (lastReturn != -1) + return lastReturn + min; + } + return DocIdSetIterator.NO_MORE_DOCS; + } + + @Override + public int getCursor() { + + while(marker < lastReturn) + { + if(bitSet.fastGet(++marker)) + { + cursor++; + } + } + + return cursor; + } + } + + @Override + public OBSDocIdSetIterator iterator() { + return new OBSDocIdSetIterator(); + } + + public int range() { + return max - min; + } + + public int size() { + return (int) this.bitSet.cardinality(); + } + + public int findWithIndex(int val) { + + val -= min; + if (val >=0 && bitSet.get(val)) { + int index = -1; + int counter = -1; + while(true) + { + index = this.bitSet.nextSetBit(index+1); + if(index<=val && index!=-1) + counter++; + else + break; + } + return counter; + + } else + return -1; + + } + @Override + public boolean find(int val) { + + val -= min; + if (val >=0 && bitSet.get(val)) { + return true; + } else + return false; + + } + + @Override + public long sizeInBytes() + { + return bitSet.capacity()/8; + } + + @Override + public void optimize() + { + this.bitSet.trimTrailingZeros(); + } +} Index: contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/impl/OrDocIdSet.java =================================================================== --- contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/impl/OrDocIdSet.java (revision 0) +++ contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/impl/OrDocIdSet.java (revision 0) @@ -0,0 +1,112 @@ +package org.apache.lucene.kamikaze.docidset.impl; + +import java.io.IOException; +import java.io.Serializable; +import java.util.Comparator; +import java.util.List; + +import org.apache.log4j.Logger; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; + + +public class OrDocIdSet extends ImmutableDocSet implements Serializable { + private static final long serialVersionUID = 1L; + + private static final int INVALID = -1; + + private static Logger log = Logger.getLogger(OrDocIdSet.class); + + public class AescDocIdSetComparator implements Comparator, + Serializable { + + private static final long serialVersionUID = 1L; + + public int compare(DocIdSetIterator o1, DocIdSetIterator o2) { + return o1.docID() - o2.docID(); + } + + } + + List sets = null; + + private int _size = INVALID; + + + + public OrDocIdSet(List docSets) { + this.sets = docSets; + int size = 0; + if (sets != null) { + for(DocIdSet set : sets) { + if(set != null) size++; + } + } + } + + @Override + public DocIdSetIterator iterator() throws IOException{ + return new OrDocIdSetIterator(sets); + /* + List list = new ArrayList(sets.size()); + for (DocIdSet set : sets) + { + list.add(set.iterator()); + } + return new DisjunctionDISI(list); + */ + } + + + /** + * Find existence in the set with index + * + * NOTE : Expensive call. Avoid. + * @param val value to find the index for + * @return index where the value is + */ + @Override + public int findWithIndex(int val) throws IOException + { + DocIdSetIterator finder = new OrDocIdSetIterator(sets); + int cursor = -1; + try { + int docid; + while((docid = finder.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) + { + if(docid > val) + return -1; + else if(docid== val ) + return ++cursor; + else + ++cursor; + + + } + } catch (IOException e) { + return -1; + } + return -1; + } + + @Override + public int size() throws IOException + { + + if(_size==INVALID) + { + _size=0; + DocIdSetIterator it = this.iterator(); + + try { + while(it.nextDoc()!=DocIdSetIterator.NO_MORE_DOCS) + _size++; + } catch (IOException e) { + e.printStackTrace(); + _size = INVALID; + } + + } + return _size; + } +} Index: contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/impl/OrDocIdSetIterator.java =================================================================== --- contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/impl/OrDocIdSetIterator.java (revision 0) +++ contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/impl/OrDocIdSetIterator.java (revision 0) @@ -0,0 +1,165 @@ +package org.apache.lucene.kamikaze.docidset.impl; + +import java.io.IOException; +import java.util.List; + +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; + +public class OrDocIdSetIterator extends DocIdSetIterator { + + private final class Item + { + public final DocIdSetIterator iter; + public int doc; + public Item(DocIdSetIterator iter) + { + this.iter = iter; + this.doc = -1; + } + } + private int _curDoc; + private final Item[] _heap; + private int _size; + + OrDocIdSetIterator(List sets) throws IOException + { + _curDoc = -1; + _heap = new Item[sets.size()]; + _size = 0; + for(DocIdSet set : sets) + { + _heap[_size++] = new Item(set.iterator()); + } + if(_size == 0) _curDoc = DocIdSetIterator.NO_MORE_DOCS; + } + + @Override + public final int docID() { + return _curDoc; + } + + @Override + public final int nextDoc() throws IOException + { + if(_curDoc == DocIdSetIterator.NO_MORE_DOCS) return DocIdSetIterator.NO_MORE_DOCS; + + Item top = _heap[0]; + while(true) + { + DocIdSetIterator topIter = top.iter; + int docid; + if((docid = topIter.nextDoc())!=DocIdSetIterator.NO_MORE_DOCS) + { + top.doc = docid; + heapAdjust(); + } + else + { + heapRemoveRoot(); + if(_size == 0) return (_curDoc = DocIdSetIterator.NO_MORE_DOCS); + } + top = _heap[0]; + int topDoc = top.doc; + if(topDoc > _curDoc) + { + return (_curDoc = topDoc); + } + } + } + + @Override + public final int advance(int target) throws IOException + { + if(_curDoc == DocIdSetIterator.NO_MORE_DOCS) return DocIdSetIterator.NO_MORE_DOCS; + + if(target <= _curDoc) target = _curDoc + 1; + + Item top = _heap[0]; + while(true) + { + DocIdSetIterator topIter = top.iter; + int docid; + if((docid = topIter.advance(target))!=DocIdSetIterator.NO_MORE_DOCS) + { + top.doc = docid; + heapAdjust(); + } + else + { + heapRemoveRoot(); + if (_size == 0) return (_curDoc = DocIdSetIterator.NO_MORE_DOCS); + } + top = _heap[0]; + int topDoc = top.doc; + if(topDoc >= target) + { + return (_curDoc = topDoc); + } + } + } + +// Organize subScorers into a min heap with scorers generating the earlest document on top. + /* + private final void heapify() { + int size = _size; + for (int i=(size>>1)-1; i>=0; i--) + heapAdjust(i); + } + */ + /* The subtree of subScorers at root is a min heap except possibly for its root element. + * Bubble the root down as required to make the subtree a heap. + */ + private final void heapAdjust() + { + final Item[] heap = _heap; + final Item top = heap[0]; + final int doc = top.doc; + final int size = _size; + int i = 0; + + while(true) + { + int lchild = (i<<1)+1; + if(lchild >= size) break; + + Item left = heap[lchild]; + int ldoc = left.doc; + + int rchild = lchild+1; + if(rchild < size){ + Item right = heap[rchild]; + int rdoc = right.doc; + + if(rdoc <= ldoc) + { + if(doc <= rdoc) break; + + heap[i] = right; + i = rchild; + continue; + } + } + + if(doc <= ldoc) break; + + heap[i] = left; + i = lchild; + } + heap[i] = top; + } + + // Remove the root Scorer from subScorers and re-establish it as a heap + private final void heapRemoveRoot() + { + _size--; + if (_size > 0) + { + Item tmp = _heap[0]; + _heap[0] = _heap[_size]; + _heap[_size] = tmp; // keep the finished iterator at the end for debugging + heapAdjust(); + } + } + +} Index: contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/impl/P4DDocIdSet.java =================================================================== --- contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/impl/P4DDocIdSet.java (revision 0) +++ contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/impl/P4DDocIdSet.java (revision 0) @@ -0,0 +1,582 @@ +package org.apache.lucene.kamikaze.docidset.impl; + +import java.io.Serializable; + +import org.apache.lucene.kamikaze.docidset.api.StatefulDSIterator; +import org.apache.lucene.kamikaze.docidset.bitset.MyOpenBitSet; +import org.apache.lucene.kamikaze.docidset.compression.P4DSetNoBase; +import org.apache.lucene.kamikaze.docidset.utils.IntArray; +import org.apache.lucene.search.DocIdSetIterator; + + +/** + * Doc id set wrapper around P4DSet + * + * + * @author abhasin + * + */ +public class P4DDocIdSet extends AbstractDocSet implements Serializable { + + private static final long serialVersionUID = 1L; + + private static final int DEFAULT_B = 5; + /** + * Utitlity Object compression. + */ + private P4DSetNoBase compressedSet = new P4DSetNoBase(); + + /** + * List for the base integer values of the compressed batches. + */ + private IntArray baseList = null; + + + + public P4DDocIdSet() { + + baseList = new IntArray(); + compressedBits = 0; + + } + + public P4DDocIdSet(int batchSize) { + this(); + this.BATCH_SIZE = batchSize; + this.BATCH_OVER = batchSize / 20; + + } + + @Override + public final boolean isCacheable() { + return true; + } + + @Override + protected Object compress() { + current[0] = 0; + compressedSet.setParam(current_base, current_b, BATCH_SIZE, + current_ex_count); + baseList.add(current_base); + return compressedSet.compressAlt(current); + } + + /** + * Method to decompress the entire batch + * + * @param blob MyOpenBitSet + * @return int array with decompressed segment of numbers + */ + protected int[] decompress(MyOpenBitSet blob) { + return new P4DSetNoBase().decompress(blob); + } + + /** + * Binary search + * + * @param val + * @param begin + * @param end + * @return index greater than or equal to the target. -1 if the target is out + * of range. + */ + protected int binarySearchForNearest(int val, int begin, int end) { + int mid = (begin + end) / 2; + if (mid == end || (baseList.get(mid) <= val && baseList.get(mid + 1) > val)) + return mid; + else if (baseList.get(mid) < val) + return binarySearchForNearest(val, mid + 1, end); + else + return binarySearchForNearest(val, begin, mid); + } + + protected int binarySearchForNearestAlt(int val, int begin, int end) + { + while(true) + { + + int mid = (begin+end)/2; + + if(mid==end || (baseList.get(mid) <= val && baseList.get(mid + 1) > val)) + return mid; + + else if(baseList.get(mid) < val) + { + begin = mid+1; + + } + else + { + end=mid; + } + } + + } + + class P4DDocIdSetIterator extends StatefulDSIterator implements Serializable { + + private static final long serialVersionUID = 1L; + + /** + * Address bits + * + */ + int ADDRESS_BITS = (int) (Math.log(BATCH_SIZE) / Math.log(2)); + + /** + * retaining Offset from the list of blobs from the iterator pov + * + */ + int cursor = -1; + + /** + * Current iterating batch index. + * + */ + int bi = -1; + + /** + * Current iterating offset. + * + */ + int offset = 0; + + /** + * doc() returned + * + */ + int lastReturn = -1; + + /** + * size of the set + * + */ + int size = size(); + + /** + * Reference to the blob iterating + * + */ + long[] ref = null; + + /** + * Reference to the blob iterating + * + */ + int blobSize = blob.size(); + + + P4DSetNoBase localCompressedSet = new P4DSetNoBase(); + + + + P4DDocIdSetIterator() { + super(); + localCompressedSet.setParam(0, DEFAULT_B, BATCH_SIZE, BATCH_OVER); + } + + @Override + public int docID() { + return lastReturn; + } + + /** + * Method to allow iteration in decompressed form + + public int get(OpenBitSet set, int index) { + return compressedSet.get(set, index); + }*/ + + /** + * Method to allow iteration in decompressed form + */ + public int get(long[] set, int index) { + return localCompressedSet.get(set, index); + } + + @Override + public int nextDoc() { + // increment the cursor and check if it falls in the range for the + // number of batches, if not return false else, its within range + if (++cursor < size) { + + // We are already in the array + if (bi == blobSize) { + if (offset == -1) { + lastReturn = DocIdSetIterator.NO_MORE_DOCS; + return DocIdSetIterator.NO_MORE_DOCS; + } else + lastReturn += current[offset++]; + } + // if we are not in the array but on the boundary of a batch + // update local blob and set params + else if (offset == 0) { + + bi = batchIndex(cursor); + + if (bi < blobSize) { + lastReturn = baseList.get(bi); + ref = blob.get(bi); + localCompressedSet.updateParams(ref); + offset++;// cursor - (bi << ADDRESS_BITS);+1 + } else { + // offset = 0;//cursor - (bi << ADDRESS_BITS); + lastReturn = current[offset++]; + } + } else { + + lastReturn += localCompressedSet.get(ref, offset); + offset = (++offset) % BATCH_SIZE; + } + return lastReturn; + + } + lastReturn = DocIdSetIterator.NO_MORE_DOCS; + return DocIdSetIterator.NO_MORE_DOCS; + + } + + /** + * Get the index of the batch this cursor position falls into + * + * @param index + * @return + */ + private int batchIndex(int index) { + return index >> ADDRESS_BITS; + } + + /** + * Next need be called after skipping. + * + */ + @Override + public int advance(int target) { + + if (target <= lastReturn) target = lastReturn + 1; + + // NOTE : Update lastReturn. + + if (bi == blobSize || (bi + 1 < blobSize && target < baseList.get(bi + 1))) { + while (nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + if (lastReturn >= target) return lastReturn; + } + lastReturn = DocIdSetIterator.NO_MORE_DOCS; + return DocIdSetIterator.NO_MORE_DOCS; + } + + // If the target is outside the compressed space + if (blobSize == 0 || target >= current[0]) { + + bi = blobSize; + ref = null; + + offset = findAndUpdate(current, target); + + if (offset > 0) { + cursor = blobSize * BATCH_SIZE + offset - 1; + return lastReturn; + } + // We have gone over the batch boundary + else if (offset == 0) { + cursor = (blobSize + 1) * BATCH_SIZE; + return lastReturn; + } + + lastReturn = DocIdSetIterator.NO_MORE_DOCS; + return DocIdSetIterator.NO_MORE_DOCS; + } + + + // This returns the blob where base value is less than the value looked + // for. + int index = binarySearchForNearest(target, bi, blobSize - 1); + // Move both these further, as we are in this block, so that + // doc() call works. + bi = index; + lastReturn = baseList.get(index); + ref = blob.get(index); + localCompressedSet.updateParams(ref); + + // find the nearest integer in the compressed space. + offset = findAndUpdate(ref, target, lastReturn); + + if (offset < 0) { + // oops we fell into the gap. This case happens when we land + // in the gap between two batches. We can optimize this + // step. + if (++index < blobSize) { + lastReturn = baseList.get(index); + ref = blob.get(index); + localCompressedSet.updateParams(ref); + } + else { + lastReturn = current[0]; + ref = null; + } + bi = index; + offset = 1; + } + + cursor = bi * BATCH_SIZE + offset - 1; + + return lastReturn; + } + + /* private void printSet(MyOpenBitSet test, int base) { + try { + int localBase = base; + for (int i = 1; i < BATCH_SIZE; i++) { + localBase += compressedSet.get(test, i); + System.out.print(localBase + ","); + } + } catch (Exception e) { + e.printStackTrace(); + int localBase = base; + int testint[] = compressedSet.decompress(test); + for (int i = 1; i < BATCH_SIZE; i++) { + localBase += testint[i]; + System.out.print(localBase + ","); + } + } + + }*/ + + private void printSet(long[] test, int base) { + try { + int localBase = base; + for (int i = 1; i < BATCH_SIZE; i++) { + localBase += localCompressedSet.get(test, i); + System.out.print(localBase + ","); + } + } catch (Exception e) { + e.printStackTrace(); + int localBase = base; + int testint[] = localCompressedSet.decompress(test); + for (int i = 1; i < BATCH_SIZE; i++) { + localBase += testint[i]; + System.out.print(localBase + ","); + } + } + + } + + /** + * Find the element in the compressed set + * + * @param next + * @param target + * @param base + * @return + */ + private int findAndUpdate(long[] next, int target, int base) { + lastReturn = base; + if (lastReturn >= target) + return 1; + + for (int i = 1; i < BATCH_SIZE; i++) { + // System.out.println("Getting "+i); + // System.out.flush(); + + lastReturn += localCompressedSet.get(next, i); + if (lastReturn >= target) { + // if(i==127) + return (i + 1) % BATCH_SIZE; + } + } + return -1; + } + + /** + * Find the element in the compressed set + * + * @param next + * @param target + * @param base + * @return + + private int findAndUpdate(MyOpenBitSet next, int target, int base) { + lastReturn = base; + if (lastReturn >= target) + return 1; + + for (int i = 1; i < BATCH_SIZE; i++) { + // System.out.println("Getting "+i); + // System.out.flush(); + + lastReturn += compressedSet.get(next, i); + if (lastReturn >= target) { + // if(i==127) + return (i + 1) % BATCH_SIZE; + } + } + return -1; + }*/ + + /** + * Find the element in the set and update parameters. + * + */ + private int findAndUpdate(int[] array, int target) { + + if(array==null) + return -1; + + lastReturn = array[0]; + if (lastReturn >= target) + return 1; + + for (int i = 1; i < current_size; i++) { + lastReturn += array[i]; + + if (lastReturn >= target) + return (i + 1) % BATCH_SIZE; + } + return -1; + + } + + public int getCursor() { + return cursor; + } + + } + + public P4DDocIdSetIterator iterator() { + return new P4DDocIdSetIterator(); + } + + @Override + public int findWithIndex(int val) { + + P4DDocIdSetIterator dcit = new P4DDocIdSetIterator(); + + int docid = dcit.advance(val); + if (docid == val) + return dcit.getCursor(); + return -1; + } + + @Override + public boolean find(int val) + { + + long time = System.nanoTime(); + int local = 0; + + if(size()==0) + return false; + //Short Circuit case where its not in the set at all + if(val>lastAdded||val=current_base) + { + + int i=0; + for( i=0;ival) + break; + } + + if(i==current_size) + return local == val; + else + return (local-current[i] == val); + } + + // We are in the compressed space + else + { + + if(baseList.size() == 0) + return false; + + int blobIndex = binarySearchForNearest(val, 0, blob.size() - 1 ); + + local = baseList.get(blobIndex); + long[] ref = blob.get(blobIndex); + P4DSetNoBase localCompressedSet = new P4DSetNoBase(); + localCompressedSet.setParam(0, DEFAULT_B, BATCH_SIZE, BATCH_OVER); + localCompressedSet.updateParams(ref); + + int i = 0; + + for(i=0;ival) + { + break; + } + + } + if(i==BATCH_SIZE) + return local == val; + else + return (local-localCompressedSet.get(ref,i))==val; + } + + + } + + private int findIn(MyOpenBitSet myOpenBitSet, int baseVal, int val) { + return -1; + } + + private int findIn(int[] current, Integer baseVal, int val) { + int local = baseVal; + for (int i = 1; i < BATCH_SIZE; i++) { + local += current[i]; + + if (val > local) { + if (local == val) + return i; + } else + return -1; + + } + return -1; + } + + @Override + public void optimize() + { + //Trim the baselist to size + this.baseList.seal(); + this.blob.seal(); + } + + + @Override + public long sizeInBytes() + { + // 64 is the overhead for an int array + // blobsize * numberofelements * 1.1 (Object Overhead) + // batch_size * 4 + int array overhead + // P4dDocIdSet Overhead 110 + optimize(); + return (long) (baseList.length()*4 + 64 +blob.length()*BATCH_SIZE*1.1 + BATCH_SIZE*4 + 24 + 110); + + } + + public int totalBlobSize() + { + int total = 0; + for(int i = blob.length() - 1; i >= 0; i--) + { + long[] segment = blob.get(i); + total += segment.length; + } + return total; + } + +} Index: contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/utils/DisiDocQueue.java =================================================================== --- contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/utils/DisiDocQueue.java (revision 0) +++ contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/utils/DisiDocQueue.java (revision 0) @@ -0,0 +1,207 @@ +package org.apache.lucene.kamikaze.docidset.utils; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Derived from org.apache.lucene.util.ScorerDocQueue of July 2008 */ + +import java.io.IOException; +import org.apache.lucene.search.DocIdSetIterator; + +/** A DisiDocQueue maintains a partial ordering of its DocIdSetIterators such that the + * least DocIdSetIterator (disi) can always be found in constant time. + * Put()'s and pop()'s require log(size) time. + * The ordering is by DocIdSetIterator.doc(). + */ +public final class DisiDocQueue { + private final HeapedDisiDoc[] heap; + private final int maxSize; + private int size; + + private static final class HeapedDisiDoc { + DocIdSetIterator disi; + int doc; + + HeapedDisiDoc(DocIdSetIterator disi) { this(disi, disi.doc()); } + + HeapedDisiDoc(DocIdSetIterator disi, int doc) { + this.disi = disi; + this.doc = doc; + } + + final void adjust() { doc = disi.doc(); } + } + + private HeapedDisiDoc topHDD; // same as heap[1], only for speed + + /** Create a DisiDocQueue with a maximum size. */ + public DisiDocQueue(int maxSize) { + // assert maxSize >= 0; + size = 0; + int heapSize = maxSize + 1; + heap = new HeapedDisiDoc[heapSize]; + this.maxSize = maxSize; + topHDD = heap[1]; // initially null + } + + /** + * Adds a Scorer to a ScorerDocQueue in log(size) time. + * If one tries to add more Scorers than maxSize + * a RuntimeException (ArrayIndexOutOfBound) is thrown. + */ + public final void put(DocIdSetIterator disi) { + size++; + heap[size] = new HeapedDisiDoc(disi); + upHeap(); + } + + /** + * Adds a DocIdSetIterator to the DisiDocQueue in log(size) time if either + * the DisiDocQueue is not full, or not lessThan(disi, top()). + * @param disi + * @return true if DocIdSetIterator is added, false otherwise. + */ + public final boolean insert(DocIdSetIterator disi){ + if (size < maxSize) { + put(disi); + return true; + } else { + int docNr = disi.doc(); + if ((size > 0) && (! (docNr < topHDD.doc))) { // heap[1] is top() + heap[1] = new HeapedDisiDoc(disi, docNr); + downHeap(); + return true; + } else { + return false; + } + } + } + + /** Returns the least DocIdSetIterator of the DisiDocQueue in constant time. + * Should not be used when the queue is empty. + */ + public final DocIdSetIterator top() { + return topHDD.disi; + } + + /** Returns document number of the least Scorer of the ScorerDocQueue + * in constant time. + * Should not be used when the queue is empty. + */ + public final int topDoc() { + return topHDD.doc; + } + + public final boolean topNextAndAdjustElsePop() throws IOException { + return checkAdjustElsePop( topHDD.disi.next()); + } + + public final boolean topSkipToAndAdjustElsePop(int target) throws IOException { + return checkAdjustElsePop( topHDD.disi.skipTo(target)); + } + + private final boolean checkAdjustElsePop(boolean cond) { + if (cond) { // see also adjustTop + topHDD.doc = topHDD.disi.doc(); + } else { // see also popNoResult + heap[1] = heap[size]; // move last to first + heap[size] = null; + size--; + } + downHeap(); + return cond; + } + + /** Removes and returns the least disi of the DisiDocQueue in log(size) + * time. + * Should not be used when the queue is empty. + */ + public final DocIdSetIterator pop() { + DocIdSetIterator result = topHDD.disi; + popNoResult(); + return result; + } + + /** Removes the least disi of the DisiDocQueue in log(size) time. + * Should not be used when the queue is empty. + */ + private final void popNoResult() { + heap[1] = heap[size]; // move last to first + heap[size] = null; + size--; + downHeap(); // adjust heap + } + + /** Should be called when the disi at top changes doc() value. + * Still log(n) worst case, but it's at least twice as fast to
+   *  { pq.top().change(); pq.adjustTop(); }
+   * 
instead of
+   *  { o = pq.pop(); o.change(); pq.push(o); }
+   * 
+ */ + public final void adjustTop() { + topHDD.adjust(); + downHeap(); + } + + /** Returns the number of disis currently stored in the DisiDocQueue. */ + public final int size() { + return size; + } + + /** Removes all entries from the DisiDocQueue. */ + public final void clear() { + for (int i = 0; i <= size; i++) { + heap[i] = null; + } + size = 0; + } + + private final void upHeap() { + int i = size; + HeapedDisiDoc node = heap[i]; // save bottom node + int j = i >>> 1; + while ((j > 0) && (node.doc < heap[j].doc)) { + heap[i] = heap[j]; // shift parents down + i = j; + j = j >>> 1; + } + heap[i] = node; // install saved node + topHDD = heap[1]; + } + + private final void downHeap() { + int i = 1; + HeapedDisiDoc node = heap[i]; // save top node + int j = i << 1; // find smaller child + int k = j + 1; + if ((k <= size) && (heap[k].doc < heap[j].doc)) { + j = k; + } + while ((j <= size) && (heap[j].doc < node.doc)) { + heap[i] = heap[j]; // shift up child + i = j; + j = i << 1; + k = j + 1; + if (k <= size && (heap[k].doc < heap[j].doc)) { + j = k; + } + } + heap[i] = node; // install saved node + topHDD = heap[1]; + } +} Index: contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/utils/DocSetFactory.java =================================================================== --- contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/utils/DocSetFactory.java (revision 0) +++ contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/utils/DocSetFactory.java (revision 0) @@ -0,0 +1,125 @@ +package org.apache.lucene.kamikaze.docidset.utils; + +import org.apache.lucene.kamikaze.docidset.api.DocSet; +import org.apache.lucene.kamikaze.docidset.impl.AbstractDocSet; +import org.apache.lucene.kamikaze.docidset.impl.IntArrayDocIdSet; +import org.apache.lucene.kamikaze.docidset.impl.OBSDocIdSet; +import org.apache.lucene.kamikaze.docidset.impl.P4DDocIdSet; + + +/** + * Utility class to make appropriate measurement calls to recognize optimal + * representation for an ordered document set based on hints provided and + * min/max/count values on the docset if available. + * + * + * @author abhasin + */ +public class DocSetFactory +{ + + + private static enum ACT { MIN, MAX, COUNT}; + + private final static int INT_SIZE = 32; + private final static int LONG_SHIFT = 6; + private final static int BITSET_COMP_SWAP_RATIO = 15; + private static int DEFAULT_MIN = 0; + private static int DEFAULT_MAX = 3000000; + private static int DEFAULT_COUNT = 1000; + private static long DEFAULT_INVOKE = 10000L; + private static long INVOKE = DEFAULT_INVOKE; + private static long INT_ARRAY_MAX = 500000; + + + public static enum FOCUS {PERFORMANCE, SPACE, OPTIMAL}; + + + public static DocSet getDocSetInstance(int min, int max, int count, FOCUS hint) + { + // Default to Medians + if( min==-1||max==-1 || count==-1) + { + min = DEFAULT_MIN; + max = DEFAULT_MAX; + count = DEFAULT_COUNT; + + } + else + { + bucket(min, ACT.MIN); + bucket(max, ACT.MAX); + bucket(count, ACT.COUNT); + } + + + INVOKE++; + if(INVOKE==Long.MAX_VALUE) + INVOKE=10000L; + + switch(hint) + { + // Always Favor IntArray or OpenBitSet + case PERFORMANCE: + if((((max-min)>>>LONG_SHIFT)+1)*2*INT_SIZE > count * INT_SIZE) + return new IntArrayDocIdSet(count); + else + //return new IntArrayDocIdSet(count); + return new OBSDocIdSet(max-min+1); + + // Always Favor BitSet or Compression + case SPACE: + if((max-min)/countBITSET_COMP_SWAP_RATIO) + { + if(count < AbstractDocSet.DEFAULT_BATCH_SIZE) + return new IntArrayDocIdSet(count); + else + return new P4DDocIdSet(); + } + else if((((max-min)>>>LONG_SHIFT)+1)*2*INT_SIZE > count * INT_SIZE) + return new IntArrayDocIdSet(count); + else + return new OBSDocIdSet(max-min+1); + + + } + + return new IntArrayDocIdSet(count); + + + } + + private static void bucket(int val, ACT act ) { + + switch (act) + { + case MIN: + { + DEFAULT_MIN = (int) ((DEFAULT_MIN*INVOKE + val)/(INVOKE+1)); + break; + } + + case MAX: + { + DEFAULT_MAX = (int) ((DEFAULT_MAX*INVOKE + val)/(INVOKE+1)); + break; + } + case COUNT: + { + DEFAULT_COUNT = (int) ((DEFAULT_COUNT*INVOKE + val)/(INVOKE+1)); + break; + } + + } + } + + +} Index: contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/utils/IntArray.java =================================================================== --- contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/utils/IntArray.java (revision 0) +++ contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/utils/IntArray.java (revision 0) @@ -0,0 +1,100 @@ +/** + * Bobo Browse Engine - High performance faceted/parametric search implementation + * that handles various types of semi-structured data. Written in Java. + * + * Copyright (C) 2005-2006 John Wang + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * To contact the project administrators for the bobo-browse project, + * please go to https://sourceforge.net/projects/bobo-browse/, or + * send mail to owner@browseengine.com. + */ + +package org.apache.lucene.kamikaze.docidset.utils; + +import java.io.Serializable; + + +/** + * + */ +public class IntArray extends PrimitiveArray implements Serializable { + + private static final long serialVersionUID = 1L; + + public IntArray(int len) { + super(len); + } + + public IntArray() { + super(); + } + + public void add(int val) { + ensureCapacity(_count + 1); + int[] array = (int[]) _array; + array[_count] = val; + _count++; + } + + + public void set(int index, int val) { + ensureCapacity(index); + int[] array = (int[]) _array; + array[index] = val; + _count = Math.max(_count, index + 1); + } + + public int get(int index) { + int[] array = (int[]) _array; + return array[index]; + } + + public boolean contains(int elem) { + int size = this.size(); + for (int i = 0; i < size; ++i) { + if (get(i) == elem) + return true; + } + return false; + } + + @Override + protected Object buildArray(int len) { + return new int[len]; + } + + + + public static int binarySearch(int[] a, int fromIndex, int toIndex,int key) { + int low = fromIndex; + int high = toIndex - 1; + + while (low <= high) { + int mid = (low + high) >>> 1; + int midVal = a[mid]; + + if (midVal < key) + low = mid + 1; + else if (midVal > key) + high = mid - 1; + else + return mid; // key found + } + return -(low + 1); // key not found. + } + +} Index: contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/utils/LongSegmentArray.java =================================================================== --- contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/utils/LongSegmentArray.java (revision 0) +++ contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/utils/LongSegmentArray.java (revision 0) @@ -0,0 +1,40 @@ +package org.apache.lucene.kamikaze.docidset.utils; + +import java.io.Serializable; + +import org.apache.lucene.kamikaze.docidset.bitset.MyOpenBitSet; + + +public class LongSegmentArray extends PrimitiveArray implements Serializable{ + + + public LongSegmentArray(int len) { + super(len); + } + + public LongSegmentArray() { + super(); + } + + protected Object buildArray(int len) { + return new long[len][]; + } + + public void add(long[] val) { + ensureCapacity(_count + 1); + long[][] array = (long[][]) _array; + array[_count] = val; + _count++; + } + + + public void set(int index, long[] ref) { + ensureCapacity(index); + ((long[][])_array)[index] = ref; + _count = Math.max(_count, index + 1); + } + + public long[] get(int index) { + return ((long[][])_array)[index]; + } +} Index: contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/utils/MyOpenBitSetArray.java =================================================================== --- contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/utils/MyOpenBitSetArray.java (revision 0) +++ contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/utils/MyOpenBitSetArray.java (revision 0) @@ -0,0 +1,44 @@ +package org.apache.lucene.kamikaze.docidset.utils; + +import java.io.Serializable; + +import org.apache.lucene.kamikaze.docidset.bitset.MyOpenBitSet; + + +/** Dynamic Array to hold MyOpenBitSets + * + * author@abhasin + */ +public class MyOpenBitSetArray extends PrimitiveArray implements Serializable{ + + + public MyOpenBitSetArray(int len) { + super(len); + } + + public MyOpenBitSetArray() { + super(); + } + + protected Object buildArray(int len) { + return new MyOpenBitSet[len]; + } + + public void add(MyOpenBitSet val) { + ensureCapacity(_count + 1); + MyOpenBitSet[] array = (MyOpenBitSet[]) _array; + array[_count] = val; + _count++; + } + + + public void set(int index, MyOpenBitSet ref) { + ensureCapacity(index); + ((MyOpenBitSet[])_array)[index] = ref; + _count = Math.max(_count, index + 1); + } + + public MyOpenBitSet get(int index) { + return ((MyOpenBitSet[])_array)[index]; + } +} Index: contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/utils/PrimitiveArray.java =================================================================== --- contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/utils/PrimitiveArray.java (revision 0) +++ contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/utils/PrimitiveArray.java (revision 0) @@ -0,0 +1,147 @@ +/** + * Bobo Browse Engine - High performance faceted/parametric search implementation + * that handles various types of semi-structured data. Written in Java. + * + * Copyright (C) 2005-2006 John Wang + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * To contact the project administrators for the bobo-browse project, + * please go to https://sourceforge.net/projects/bobo-browse/, or + * send mail to owner@browseengine.com. + */ + +package org.apache.lucene.kamikaze.docidset.utils; + +import java.io.Serializable; +import java.lang.reflect.Array; + +public abstract class PrimitiveArray implements Serializable { + protected Object _array; + + protected int _count; + + protected int _growth; + + protected int _len; + + private static final int DEFAULT_SIZE = 1000; + + protected abstract Object buildArray(int len); + + protected PrimitiveArray(int len) { + super(); + if (len <= 0) + throw new IllegalArgumentException("len must be greater than 0: " + len); + _array = buildArray(len); + _count = 0; + _growth = 10; + _len = len; + } + + protected PrimitiveArray() { + this(DEFAULT_SIZE); + } + + public void clear() { + _count = 0; + _growth = 10; + } + + protected synchronized void expand() { + expand(_len + 100); + } + + protected synchronized void expand(int idx) { + if (idx <= _len) + return; + int oldLen = _len; + _len = idx + _growth; + Object newArray = buildArray(_len); + System.arraycopy(_array, 0, newArray, 0, oldLen); + _growth += _len; + _array = newArray; + } + + public synchronized void ensureCapacity(int idx) { + expand(idx); + } + + public int size() { + return _count; + } + + /** + * called to shrink the array size to the current # of elements to save + * memory. + * + */ + public synchronized void seal() { + if (_len > _count) { + Object newArray = buildArray(_count); + System.arraycopy(_array, 0, newArray, 0, _count); + _array = newArray; + _len = _count; + } + _growth = 10; + } + + public synchronized T[] toArray(T[] array) { + System.arraycopy(_array, 0, array, 0, _count); + return array; + } + + public synchronized Object toArray() { + Object array = buildArray(_count); + System.arraycopy(_array, 0, array, 0, _count); + return array; + } + + @SuppressWarnings("unchecked") + @Override + public PrimitiveArray clone() { + PrimitiveArray obj; + try { + obj = this.getClass().newInstance(); + obj._count = _count; + obj._growth = _growth; + obj._len = _len; + + Object newArray = buildArray(_len); + System.arraycopy(_array, 0, newArray, 0, _count); + obj._array = newArray; + return obj; + } catch (Exception e) { + throw new RuntimeException(e.getMessage()); + } + } + + public String toString() { + StringBuffer buffer = new StringBuffer("["); + for (int i = 0; i < _count; ++i) { + if (i != 0) { + buffer.append(", "); + } + buffer.append(Array.get(_array, i)); + } + buffer.append(']'); + + return buffer.toString(); + } + + public int length() { + return _len; + } +} Index: contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/utils/TestSizeEstimates.java =================================================================== --- contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/utils/TestSizeEstimates.java (revision 0) +++ contrib/kamikaze/src/org/apache/lucene/kamikaze/docidset/utils/TestSizeEstimates.java (revision 0) @@ -0,0 +1,269 @@ +package org.apache.lucene.kamikaze.docidset.utils; + +import java.lang.reflect.Array; +import java.lang.reflect.Constructor; +import java.util.ArrayList; +import java.util.BitSet; + +import org.apache.lucene.kamikaze.docidset.bitset.MyOpenBitSet; +import org.apache.lucene.kamikaze.docidset.utils.IntArray; +import org.apache.lucene.kamikaze.docidset.utils.LongSegmentArray; +import org.apache.lucene.kamikaze.docidset.utils.MyOpenBitSetArray; + + +public class TestSizeEstimates +{ + + public static void estimateIntArraySize(int size) + { + try { + Class clazz = IntArray.class; + System.out.println(clazz.getName()+":"+sizeOf(clazz,size)); + } catch (Exception e) { + e.printStackTrace(); + } + } + + + public static void estimateBitSetSize(int size) + { + try { + Class clazz = BitSet.class; + System.out.println(clazz.getName()+":"+sizeOf(clazz,size)); + } catch (Exception e) { + e.printStackTrace(); + } + } + + public static void estimateMyOpenBitSetSize(int size) + { + try { + Class clazz = MyOpenBitSet.class; + System.out.println(clazz.getName()+":"+sizeOf(clazz,size)); + } catch (Exception e) { + e.printStackTrace(); + } + } + + + + private static void estimateArrayListSize(int size) { + try { + Class clazz = ArrayList.class; + System.out.println(clazz.getName()+":"+sizeOf(clazz,size)); + } catch (Exception e) { + e.printStackTrace(); + } + } + + + + + public static long sizeOf(Class clazz, int userData) { + long size= 0; + Object[] objects = new Object[100]; + try { + + Constructor c; + try{ + c = clazz.getConstructor(long.class); + } + catch(Exception e) + { + c = null; + } + + if(c == null) + c= clazz.getConstructor(int.class); + + Object primer = c.newInstance(userData); + long startingMemoryUse = getUsedMemory(); + for (int i = 0; i < objects.length; i++) { + objects[i] = c.newInstance(userData); + fill(objects[i], userData); + optimize(objects[i]); + } + long endingMemoryUse = getUsedMemory(); + float approxSize = (endingMemoryUse - + startingMemoryUse)/100f ; + size = Math.round(approxSize); + } catch (Exception e) { + e.printStackTrace(); + System.out.println("WARNING:couldn't instantiate" + +clazz); + e.printStackTrace(); + } + return size; + } + + private static void estimateNativeIntArraySize(int userData) { + + int array[] = (int[])Array.newInstance(int.class, userData); + + long size= 0; + Object[] objects = new Object[100]; + try { + + long startingMemoryUse = getUsedMemory(); + for (int i = 0; i < objects.length; i++) { + objects[i] = (int[])Array.newInstance(int.class, userData); + fill(objects[i], userData); + optimize(objects[i]); + } + long endingMemoryUse = getUsedMemory(); + float approxSize = (endingMemoryUse - + startingMemoryUse) /100f; + size = Math.round(approxSize); + } catch (Exception e) { + e.printStackTrace(); + System.out.println("WARNING:couldn't instantiate Native Int Array"); + + } + System.out.println(int[].class.getName()+":"+size); + } + + + private static void estimateMyOpenBitSetArraySize(int userData) { + + try { + Class clazz = MyOpenBitSetArray.class; + System.out.println(clazz.getName()+":"+sizeOf(clazz,userData)); + } catch (Exception e) { + e.printStackTrace(); + } + } + + + private static void estimateLongSegmentArraySize(int userData) { + // TODO Auto-generated method stub + try { + Class clazz = LongSegmentArray.class; + System.out.println(clazz.getName()+":"+sizeOf(clazz,userData)); + } catch (Exception e) { + e.printStackTrace(); + } + } + + private static void fill(Object object, int userData) { + + if(object instanceof MyOpenBitSet) + ((MyOpenBitSet)object).set(userData-1); + else if(object instanceof ArrayList) + { + for(int i = 0 ;i < userData; i++ ) + { + ((ArrayList)object).add(new Integer(10)); + } + } + else if(object instanceof IntArray) + { + for(int i = 0 ;i < userData; i++ ) + { + ((IntArray)object).set(i, 10); + } + } + else if(object instanceof int[] ) + { + for(int i = 0 ;i < userData; i++ ) + { + ( (int[])object)[i] = 10; + } + } + else if(object instanceof long[] ) + { + for(int i = 0 ;i < userData; i++ ) + { + ( (long[])object)[i] = 10; + } + } + else if(object instanceof MyOpenBitSetArray) + { + for(int i = 0 ;i < userData; i++ ) + { + + ((MyOpenBitSetArray) object).add(new MyOpenBitSet(1200)); + fill(((MyOpenBitSetArray) object).get(i) ,1200); + } + } + else if(object instanceof LongSegmentArray) + { + for(int i = 0 ;i < userData; i++ ) + { + ((LongSegmentArray) object).add(new long[2000>>>6]); + fill(((LongSegmentArray) object) .get(i),2000>>>6); + } + } + + } + +private static void optimize(Object object) { + + if(object instanceof MyOpenBitSet) + ((MyOpenBitSet)object).trimTrailingZeros(); + else if(object instanceof ArrayList) + ((ArrayList)object).trimToSize(); + else if(object instanceof IntArray) + ((IntArray)object).seal(); + + + + } + + +private static long getUsedMemory() { + gc(); + long totalMemory = Runtime.getRuntime().totalMemory(); + gc(); + long freeMemory = Runtime.getRuntime().freeMemory(); + long usedMemory = totalMemory - freeMemory; + return usedMemory; + } + private static void gc() { + try { + System.gc(); + Thread.currentThread().sleep(100); + System.runFinalization(); + Thread.currentThread().sleep(100); + System.gc(); + Thread.currentThread().sleep(100); + System.runFinalization(); + Thread.currentThread().sleep(100); + + } catch (Exception e) { + e.printStackTrace(); + } + } + public static void main(String[] args) { + //estimateMyOpenBitSetSize(1200); + //estimateBitSetSize(1200); + estimateIntArraySize(1); + estimateIntArraySize(10); + estimateIntArraySize(100); + estimateIntArraySize(1000); + estimateIntArraySize(10000); + + + //estimateArrayListSize(1024); + //estimateMyOpenBitSetArraySize(32000); + estimateLongSegmentArraySize(1); + estimateLongSegmentArraySize(10); + estimateLongSegmentArraySize(100); + estimateLongSegmentArraySize(1000); + estimateLongSegmentArraySize(10000); + + estimateNativeIntArraySize(1); + estimateNativeIntArraySize(10); + estimateNativeIntArraySize(100); + estimateNativeIntArraySize(1000); + estimateNativeIntArraySize(10000); + + + + //estimateLongSegmentArraySize(32000); + System.exit(0); + } + + + + +} Index: contrib/kamikaze/test/org/apache/lucene/kamikaze/test/KamikazeTest.java =================================================================== --- contrib/kamikaze/test/org/apache/lucene/kamikaze/test/KamikazeTest.java (revision 0) +++ contrib/kamikaze/test/org/apache/lucene/kamikaze/test/KamikazeTest.java (revision 0) @@ -0,0 +1,170 @@ +package org.apache.lucene.kamikaze.test; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.Random; + +import junit.framework.TestCase; + +import org.apache.lucene.kamikaze.docidset.api.DocSet; +import org.apache.lucene.kamikaze.docidset.impl.AndDocIdSet; +import org.apache.lucene.kamikaze.docidset.impl.OrDocIdSet; +import org.apache.lucene.kamikaze.docidset.impl.P4DDocIdSet; +import org.apache.lucene.kamikaze.docidset.utils.DocSetFactory; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.OpenBitSet; + + + +public class KamikazeTest extends TestCase +{ + + public void testMultipleIntersections() throws Exception + { + + System.out.println("Running Multiple Intersections Test case..."); + System.out.println("-------------------------------------------"); + + ArrayList obs = new ArrayList(); + ArrayList docs = new ArrayList(); + Random rand = new Random(System.currentTimeMillis()); + int maxDoc = 350000; + for(int i=0; i < 3; ++i) + { + int numdocs = rand.nextInt(maxDoc); + ArrayList nums = new + ArrayList(); + HashSet seen = new HashSet(); + for (int j = 0; j < numdocs; j++) + { + int nextDoc = rand.nextInt(maxDoc); + if(seen.contains(nextDoc)) + { + while(seen.contains(nextDoc)) + { + nextDoc = + rand.nextInt(maxDoc); + } + } + nums.add(nextDoc); + seen.add(nextDoc); + } + Collections.sort(nums); + obs.add(createObs(nums, maxDoc)); + docs.add(createDocSet(nums)); + } + OpenBitSet base = obs.get(0); + for(int i = 1; i < obs.size(); ++i) + { + base.intersect(obs.get(i)); + } + + AndDocIdSet ands = new AndDocIdSet(docs); + long card1 = base.cardinality(); + long card2 = ands.size(); + //System.out.println(card1+":"+card2); + assertEquals(card1, card2); + } + + + private OpenBitSet createObs(ArrayList nums, int maxDoc) { + OpenBitSet bitSet = new OpenBitSet(maxDoc); + for(int num:nums) + bitSet.set(num); + return bitSet; + } + + private DocIdSet createDocSet(ArrayList nums) throws Exception{ + DocSet p4d = DocSetFactory.getDocSetInstance(0, 35000000, 200000, + DocSetFactory.FOCUS.OPTIMAL); + for(int num:nums) + p4d.addDoc(num); + return p4d; + } + + + public void testForOutOfBounds() throws Exception + { + + System.out.println("Running OutOfBounds Test case..."); + System.out.println("-------------------------------------------"); + + Random rand = new Random(System.currentTimeMillis()); + int maxDoc = 350000; + ArrayList nums = new ArrayList(); + HashSet seen = new HashSet(); + for(int i=0; i < 68; ++i) + { + int nextDoc=rand.nextInt(maxDoc); + if(seen.contains(nextDoc)) + { + while(seen.contains(nextDoc)) + { + nextDoc += rand.nextInt(maxDoc); + } + } + nums.add(nextDoc); + seen.add(nextDoc); + } + Collections.sort(nums); + DocSet docs = new P4DDocIdSet(); + boolean saw403 = false; + for (Integer integer : nums) + { + saw403=(integer == 403); + docs.addDoc(integer); + } + boolean got = docs.find(403); + assertEquals(saw403, got); + } + + public void testPartialEmptyAnd() throws IOException + { + try + { + System.out.println("Running Partial Empty And Test case..."); + System.out.println("-------------------------------------------"); + + DocSet ds1 = new P4DDocIdSet(); + DocSet ds2 = new P4DDocIdSet(); + ds2.addDoc(42); + ds2.addDoc(43); + ds2.addDoc(44); + ArrayList docs = new +ArrayList(); + docs.add(ds1); + docs.add(ds2); + OrDocIdSet orlist1 = new OrDocIdSet(docs); + DocSet ds3 = new P4DDocIdSet(); + DocSet ds4 = new P4DDocIdSet(); + ds4.addDoc(42); + ds4.addDoc(43); + ds4.addDoc(44); + ArrayList docs2 = new +ArrayList(); + docs2.add(ds3); + docs2.add(ds4); + OrDocIdSet orlist2 = new OrDocIdSet(docs2); + ArrayList docs3 = new +ArrayList(); + docs3.add(orlist1); + docs3.add(orlist2); + AndDocIdSet andlist = new AndDocIdSet(docs3); + + DocIdSetIterator iter = andlist.iterator(); + @SuppressWarnings("unused") + int docId = -1; + while((docId = iter.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) + { + } + } + catch(Exception e) + { + e.printStackTrace(); + } + assertTrue(true); + } +} Index: contrib/kamikaze/test/org/apache/lucene/kamikaze/test/TestDocIdSetSuite.java =================================================================== --- contrib/kamikaze/test/org/apache/lucene/kamikaze/test/TestDocIdSetSuite.java (revision 0) +++ contrib/kamikaze/test/org/apache/lucene/kamikaze/test/TestDocIdSetSuite.java (revision 0) @@ -0,0 +1,10 @@ +package org.apache.lucene.kamikaze.test; + +import org.junit.runner.RunWith; +import org.junit.runners.Suite; +import org.junit.runners.Suite.SuiteClasses; + +@RunWith(Suite.class) +@SuiteClasses( { KamikazeTest.class, TestMultiThreadedAccess.class, TestDocSets.class, TestParameterizedDocSets.class, TestDocSetSerialization.class, TestDocSetFactory.class }) +public class TestDocIdSetSuite { +} Index: contrib/kamikaze/test/org/apache/lucene/kamikaze/test/TestDocSetFactory.java =================================================================== --- contrib/kamikaze/test/org/apache/lucene/kamikaze/test/TestDocSetFactory.java (revision 0) +++ contrib/kamikaze/test/org/apache/lucene/kamikaze/test/TestDocSetFactory.java (revision 0) @@ -0,0 +1,81 @@ +package org.apache.lucene.kamikaze.test; + +import static org.junit.Assert.assertEquals; + +import org.apache.lucene.kamikaze.docidset.utils.DocSetFactory; +import org.apache.lucene.kamikaze.docidset.utils.DocSetFactory.FOCUS; +import org.apache.lucene.search.DocIdSet; +import org.junit.Test; + +public class TestDocSetFactory { + + private static int batch = 128; + + private static String serial = "SerialDocSet"; + + public TestDocSetFactory() { + + } + + + + + @Test + public void testDocSetFactory() { + + + int min = 44; + int max = 533222; + int count = 100; + int sparseThresholdCount = 500000; + + DocIdSet set = DocSetFactory.getDocSetInstance(min, max, count, FOCUS.PERFORMANCE); + assertEquals(set.getClass().getName(), "com.kamikaze.docidset.impl.IntArrayDocIdSet"); + set = DocSetFactory.getDocSetInstance(min, max, count, FOCUS.SPACE); + assertEquals(set.getClass().getName(), "com.kamikaze.docidset.impl.P4DDocIdSet"); + set = DocSetFactory.getDocSetInstance(min, max, count, FOCUS.OPTIMAL); + assertEquals(set.getClass().getName(), "com.kamikaze.docidset.impl.IntArrayDocIdSet"); + + min = 10; + max = 25000000; + count = 100; + + set = DocSetFactory.getDocSetInstance(min, max, count, FOCUS.OPTIMAL); + assertEquals(set.getClass().getName(), "com.kamikaze.docidset.impl.IntArrayDocIdSet"); + + count *=10000; + set = DocSetFactory.getDocSetInstance(min, max, count, FOCUS.OPTIMAL); + assertEquals(set.getClass().getName(), "com.kamikaze.docidset.impl.P4DDocIdSet"); + + max = 1000000000; + count*=1000; + set = DocSetFactory.getDocSetInstance(min, max, count, FOCUS.OPTIMAL); + assertEquals(set.getClass().getName(), "com.kamikaze.docidset.impl.OBSDocIdSet"); + + min = 10; + max = 30000000; + count = 10000000; + + set = DocSetFactory.getDocSetInstance(min, max, count, FOCUS.SPACE); + assertEquals(set.getClass().getName(), "com.kamikaze.docidset.impl.OBSDocIdSet"); + + count /= 10000000; + + set = DocSetFactory.getDocSetInstance(min, max, count, FOCUS.SPACE); + assertEquals(set.getClass().getName(), "com.kamikaze.docidset.impl.P4DDocIdSet"); + + min = 10; + max = 30000000; + count = 10000000; + + set = DocSetFactory.getDocSetInstance(min, max, count, FOCUS.PERFORMANCE); + assertEquals(set.getClass().getName(), "com.kamikaze.docidset.impl.OBSDocIdSet"); + + count /= 10000000; + + set = DocSetFactory.getDocSetInstance(min, max, count, FOCUS.PERFORMANCE); + assertEquals(set.getClass().getName(), "com.kamikaze.docidset.impl.IntArrayDocIdSet"); + + + } +} \ No newline at end of file Index: contrib/kamikaze/test/org/apache/lucene/kamikaze/test/TestDocSetSerialization.java =================================================================== --- contrib/kamikaze/test/org/apache/lucene/kamikaze/test/TestDocSetSerialization.java (revision 0) +++ contrib/kamikaze/test/org/apache/lucene/kamikaze/test/TestDocSetSerialization.java (revision 0) @@ -0,0 +1,827 @@ +package org.apache.lucene.kamikaze.test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.fail; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Random; + +import org.apache.lucene.kamikaze.docidset.api.DocSet; +import org.apache.lucene.kamikaze.docidset.api.StatefulDSIterator; +import org.apache.lucene.kamikaze.docidset.bitset.MyOpenBitSet; +import org.apache.lucene.kamikaze.docidset.impl.AndDocIdSet; +import org.apache.lucene.kamikaze.docidset.impl.IntArrayDocIdSet; +import org.apache.lucene.kamikaze.docidset.impl.NotDocIdSet; +import org.apache.lucene.kamikaze.docidset.impl.OBSDocIdSet; +import org.apache.lucene.kamikaze.docidset.impl.OrDocIdSet; +import org.apache.lucene.kamikaze.docidset.impl.P4DDocIdSet; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.OpenBitSet; +import org.junit.Test; + + +public class TestDocSetSerialization { + + private static int batch = 128; + + private static String serial = "SerialDocSet"; + + public TestDocSetSerialization() { + + } + + + + + @Test + public void testAndDocSetSerialization() throws Exception{ + + System.out.println(""); + System.out.println("Running AndDocIdSet Serialization Test case..."); + System.out.println("----------------------------"); + int set1[] = { 8, 27, 30, 35, 53, 59, 71, 74, 87, 92, 104, 113, 122, 126, + 135, 135, 137, 138, 185, 186, 188, 192, 197, 227, 227, 230, 242, 252, + 255, 259, 267, 270, 271, 289, 298, 305, 311, 312, 325, 335, 337, 346, + 351, 360, 365, 371, 375, 380, 387, 391, 406, 407, 408, 419, 425, 430, + 443, 451, 454, 456, 464, 466, 469, 473, 478, 483, 496, 502, 517, 527, + 531, 578, 601, 605, 625, 626, 632, 638, 641, 648, 652, 653, 667, 677, + 682, 697, 700, 711, 713, 733, 764, 780, 782, 796, 798, 801, 804, 812, + 817, 831, 835, 849, 859, 872, 886, 891, 893, 895, 903, 908, 914, 915, + 916, 917, 920, 921, 926, 944, 947, 950, 956, 962, 964, 969, 979, 986, + 994, 996, 1018, 1019, 1022, 1025, 1029, 1029, 1039, 1058, 1062, 1063, + 1064, 1068, 1069, 1071, 1075, 1082, 1085, 1096, 1098, 1102, 1103, 1104, + 1104, 1119, 1120, 1122, 1122, 1123, 1147, 1149, 1179, 1183, 1195, 1197, + 1200, 1201, 1214, 1215, 1220, 1221, 1221, 1225, 1229, 1252, 1260, 1261, + 1268, 1269, 1274, 1279, 1293, 1336, 1336, 1348, 1369, 1370, 1375, 1394, + 1401, 1414, 1444, 1453, 1459, 1468, 1473, 1473, 1474, 1485, 1502, 1505, + 1506, 1517, 1518, 1520, 1521, 1522, 1528, 1537, 1543, 1549, 1550, 1560, + 1565, 1566, 1585, 1599, 1604, 1619, 1637, 1650, 1658, 1679, 1684, 1691, + 1691, 1701, 1701, 1715, 1719, 1720, 1722, 1740, 1740, 1748, 1752, 1756, + 1756, 1776, 1796, 1799, 1799, 1800, 1809, 1811, 1828, 1829, 1849, 1859, + 1865, 1868, 1886, 1900, 1933, 1955, 1959, 1983, 1985, 1999, 2003, 2003, + 2029, 2038, 2048, 2050, 2054, 2056, 2059, 2060, 2079, 2095, 2099, 2104, + 2111, 2113, 2119, 2119, 2122, 2123, 2141, 2142, 2145, 2148, 2160, 2182, + 2183, 2200, 2203, 2209, 2210, 2221, 2232, 2261, 2267, 2268, 2272, 2283, + 2297, 2298, 2313, 2314, 2316, 2316, 2331, 2332, 2338, 2343, 2345, 2350, + 2350, 2365, 2378, 2384, 2392, 2399, 2414, 2420, 2425, 2433, 2445, 2457, + 2461, 2462, 2463, 2497, 2503, 2519, 2522, 2533, 2556, 2568, 2577, 2578, + 2578, 2585, 2589, 2603, 2603, 2613, 2616, 2648, 2651, 2662, 2666, 2667, + 2672, 2675, 2679, 2691, 2694, 2694, 2699, 2706, 2708, 2709, 2711, 2711, + 2732, 2736, 2738, 2749, 2750, 2763, 2764, 2770, 2775, 2781, 2793, 2811, + 2817, 2834, 2842, 2847, 2848, 2852, 2856, 2870, 2872, 2876, 2879, 2887, + 2897, 2903, 2980, 2984, 2994, 2997 }; + int set2[] = { 7, 21, 29, 31, 35, 37, 62, 64, 67, 72, 77, 88, 90, 96, 98, + 116, 152, 154, 156, 162, 163, 173, 179, 188, 189, 201, 203, 217, 224, + 233, 263, 267, 271, 277, 294, 301, 311, 336, 343, 349, 390, 395, 396, + 401, 407, 411, 414, 425, 432, 436, 444, 468, 476, 483, 492, 496, 497, + 501, 508, 513, 517, 519, 531, 541, 543, 552, 555, 555, 568, 571, 587, + 589, 594, 601, 604, 606, 625, 633, 634, 645, 649, 654, 655, 662, 664, + 665, 666, 671, 671, 678, 690, 693, 697, 708, 714, 723, 726, 743, 746, + 747, 772, 784, 806, 811, 812, 824, 834, 836, 844, 850, 863, 867, 890, + 890, 896, 905, 931, 933, 934, 940, 952, 959, 963, 968, 974, 978, 997, + 997, 1013, 1015, 1019, 1023, 1030, 1033, 1035, 1047, 1048, 1054, 1069, + 1087, 1147, 1156, 1158, 1165, 1175, 1199, 1211, 1224, 1252, 1255, 1256, + 1259, 1274, 1280, 1283, 1290, 1292, 1292, 1294, 1297, 1299, 1300, 1301, + 1312, 1323, 1337, 1340, 1351, 1352, 1356, 1363, 1385, 1392, 1395, 1399, + 1409, 1413, 1429, 1437, 1460, 1461, 1465, 1466, 1468, 1482, 1497, 1500, + 1501, 1508, 1517, 1524, 1524, 1529, 1530, 1538, 1538, 1544, 1545, 1552, + 1556, 1561, 1566, 1569, 1583, 1598, 1606, 1610, 1613, 1634, 1642, 1643, + 1656, 1675, 1682, 1704, 1708, 1711, 1711, 1719, 1724, 1736, 1740, 1741, + 1766, 1772, 1774, 1777, 1784, 1793, 1814, 1829, 1833, 1843, 1856, 1857, + 1870, 1874, 1879, 1884, 1886, 1890, 1901, 1909, 1912, 1940, 1944, 1946, + 1947, 1948, 1955, 1962, 1971, 1982, 1989, 1995, 1997, 2012, 2015, 2021, + 2043, 2046, 2049, 2055, 2064, 2068, 2069, 2083, 2088, 2100, 2117, 2122, + 2126, 2132, 2143, 2148, 2152, 2152, 2153, 2159, 2173, 2176, 2198, 2198, + 2201, 2205, 2206, 2207, 2211, 2222, 2230, 2254, 2256, 2264, 2268, 2317, + 2318, 2319, 2330, 2334, 2344, 2353, 2353, 2354, 2369, 2374, 2376, 2392, + 2402, 2403, 2414, 2417, 2422, 2424, 2435, 2445, 2461, 2475, 2530, 2539, + 2541, 2542, 2565, 2566, 2571, 2572, 2577, 2579, 2581, 2582, 2586, 2592, + 2595, 2600, 2642, 2645, 2645, 2651, 2668, 2676, 2699, 2705, 2705, 2709, + 2715, 2720, 2720, 2736, 2753, 2756, 2761, 2788, 2792, 2793, 2796, 2801, + 2815, 2834, 2842, 2857, 2859, 2859, 2861, 2865, 2869, 2875, 2879, 2884, + 2885, 2895, 2901, 2906, 2912, 2935, 2940, 2957, 2958, 2967, 2969, 2976, + 2978, 2981, 2984, 2994, 2997 }; + int set3[] = { 2994, 2997 }; + + P4DDocIdSet pset1 = new P4DDocIdSet(batch); + DocSet pset2 = new OBSDocIdSet(3000); + P4DDocIdSet pset3 = new P4DDocIdSet(batch); + + for (int i = 0; i < set1.length; i++) { + pset1.addDoc(set1[i]); + pset2.addDoc(set2[i]); + + } + for (int i = 0; i < set3.length; i++) { + pset3.addDoc(set3[i]); + } + + List its = new ArrayList(); + its.add(pset1); + its.add(pset2); + its.add(pset3); + AndDocIdSet andSet = new AndDocIdSet(its); + + try { + File f = new File(serial); + ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(f)); + oos.writeObject(andSet); + oos.flush(); + oos.close(); + + } catch (Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + + org.apache.lucene.search.DocIdSetIterator dcit = andSet.iterator(); + int x = set1.length - 2; + AndDocIdSet andSet2 = null; + + try { + InputStream f = new FileInputStream(new File(serial)); + ObjectInputStream ois = new ObjectInputStream(new BufferedInputStream(f)); + andSet2 = (AndDocIdSet) (ois.readObject()); + } catch (Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + + org.apache.lucene.search.DocIdSetIterator dcit2 = andSet2.iterator(); + x = 0; + try { + while (x < 2) { + int docid1 = dcit.nextDoc(); + int docid2 = dcit2.nextDoc(); + assertEquals(docid1, set3[x++]); + assertEquals(docid1, docid2); + } + } catch (IOException e) { + fail(e.getMessage()); + } + } + + @Test + public void testOrDocSetSerialization() throws Exception{ + System.out.println(""); + System.out.println("Running OrDocIdSet Serializaition Test case..."); + System.out.println("----------------------------"); + + int set0[] = { 9, 20, 31, 42, 65, 76, 87, 108, 119, 130, 141, 152, 163, + 186, 197, 208, 219, 230, 241, 265, 276, 287, 298, 309, 332, 343, 354, + 365, 376, 387, 410, 421, 431, 442, 453, 476, 487, 498, 509, 520, 531, + 554, 565, 575, 586, 597, 608, 619, 630, 653, 664, 675, 686, 697, 708, + 717, 728, 739, 750, 773, 784, 814, 820, 831, 842, 853 }; + int set1[] = { 8, 19, 30, 53, 64, 75, 86, 96, 107, 118, 129, 140, 151, 174, + 185, 196, 207, 218, 229, 252, 264, 275, 286, 297, 320, 331, 342, 353, + 364, 375, 398, 409, 420, 430, 441, 464, 475, 486, 497, 508, 519, 542, + 553, 564, 574, 585, 596, 607, 618, 641, 652, 663, 674, 685, 696, 716, + 727, 738, 761, 772, 783, 802, 813, 819, 830, 841 }; + int set2[] = { 7, 41, 52, 63, 74, 85, 106, 117, 128, 139, 162, 173, 184, + 195, 206, 217, 240, 251, 263, 274, 285, 308, 319, 330, 341, 352, 363, + 386, 397, 408, 419, 429, 452, 463, 474, 485, 496, 507, 530, 541, 552, + 563, 573, 584, 595, 606, 629, 640, 651, 662, 673, 684, 707, 715, 726, + 749, 760, 771, 782, 791, 801, 812, 818, 829, 852, 858 }; + int set3[] = { 6, 29, 40, 51, 62, 73, 84, 105, 116, 127, 150, 161, 172, + 183, 194, 205, 228, 239, 250, 262, 273, 296, 307, 318, 329, 340, 351, + 374, 385, 396, 407, 418, 440, 451, 462, 473, 484, 495, 518, 529, 540, + 551, 562, 572, 583, 594, 617, 628, 639, 650, 661, 672, 695, 706, 714, + 737, 748, 759, 770, 781, 790, 793, 800, 811, 840, 851 }; + int set4[] = { 17, 28, 39, 50, 61, 72, 95, 104, 115, 138, 149, 160, 171, + 182, 193, 216, 227, 238, 249, 260, 261, 284, 295, 306, 317, 328, 339, + 362, 373, 384, 395, 406, 417, 439, 450, 461, 472, 483, 506, 517, 528, + 539, 550, 561, 582, 605, 616, 627, 638, 649, 660, 683, 694, 705, 725, + 736, 747, 758, 769, 780, 789, 799, 810, 828, 839, 850 }; + int set5[] = { 5, 16, 27, 38, 49, 60, 83, 94, 103, 126, 137, 148, 159, 170, + 181, 204, 215, 226, 237, 248, 259, 272, 283, 294, 305, 316, 327, 350, + 361, 372, 383, 394, 405, 428, 438, 449, 460, 471, 494, 505, 516, 527, + 538, 549, 593, 604, 615, 626, 637, 648, 671, 682, 693, 704, 724, 735, + 746, 757, 768, 788, 792, 798, 809, 827, 838, 849 }; + int set6[] = { 4, 15, 26, 37, 48, 71, 82, 93, 114, 125, 136, 147, 158, 169, + 192, 203, 214, 225, 236, 247, 271, 282, 293, 304, 315, 338, 349, 360, + 371, 382, 393, 416, 427, 437, 448, 459, 482, 493, 504, 515, 526, 537, + 560, 571, 581, 592, 603, 614, 625, 636, 659, 670, 681, 692, 703, 723, + 734, 745, 756, 779, 787, 796, 797, 826, 837, 848 }; + int set7[] = { 3, 14, 25, 36, 59, 70, 81, 92, 102, 113, 124, 135, 146, 157, + 180, 191, 202, 213, 224, 235, 258, 270, 281, 292, 303, 326, 337, 348, + 359, 370, 381, 404, 415, 426, 436, 447, 470, 481, 492, 503, 514, 525, + 548, 559, 570, 580, 591, 602, 613, 624, 647, 658, 669, 680, 691, 702, + 722, 733, 744, 767, 778, 795, 808, 825, 836, 847 }; + int set8[] = { 2, 13, 24, 47, 58, 69, 80, 91, 101, 112, 123, 134, 145, 168, + 179, 190, 201, 212, 223, 246, 257, 269, 280, 291, 314, 325, 336, 347, + 358, 369, 392, 403, 414, 425, 435, 458, 469, 480, 491, 502, 513, 536, + 547, 558, 569, 579, 590, 601, 612, 635, 646, 657, 668, 679, 690, 713, + 721, 732, 755, 766, 777, 786, 794, 807, 824, 835 }; + int set9[] = { 1, 10, 12, 21, 32, 35, 43, 46, 54, 57, 68, 77, 79, 88, 90, + 97, 100, 111, 120, 122, 131, 133, 142, 153, 156, 164, 167, 175, 178, + 189, 198, 200, 209, 211, 220, 231, 234, 242, 245, 253, 256, 266, 268, + 277, 279, 288, 299, 302, 310, 313, 321, 324, 335, 344, 346, 355, 357, + 366, 377, 380, 388, 391, 399, 402, 413, 422, 424, 432, 443, 446, 454, + 457, 465, 468, 479, 488, 490, 499, 501, 510, 521, 524, 532, 535, 543, + 546, 557, 566, 568, 578, 587, 589, 598, 600, 609, 620, 623, 631, 634, + 642, 645, 656, 665, 667, 676, 678, 687, 698, 701, 709, 712, 718, 720, + 729, 740, 743, 751, 754, 762, 765, 776, 785, 803, 806, 817, 821, 823, + 832, 843, 846, 854 }; + int set10[] = { 23, 34, 45, 56, 67, 78, 99, 110, 121, 144, 155, 166, 177, + 188, 199, 222, 233, 244, 255, 267, 290, 301, 312, 323, 334, 345, 368, + 379, 390, 401, 412, 423, 434, 445, 456, 467, 478, 489, 512, 523, 534, + 545, 556, 567, 577, 588, 611, 622, 633, 644, 655, 666, 689, 700, 711, + 731, 742, 753, 764, 775, 805, 816, 834, 845, 856, 857 }; + int set11[] = { 11, 22, 33, 44, 55, 66, 89, 98, 109, 132, 143, 154, 165, + 176, 187, 210, 221, 232, 243, 254, 278, 289, 300, 311, 322, 333, 356, + 367, 378, 389, 400, 411, 433, 444, 455, 466, 477, 500, 511, 522, 533, + 544, 555, 576, 599, 610, 621, 632, 643, 654, 677, 688, 699, 710, 719, + 730, 741, 752, 763, 774, 804, 815, 822, 833, 844, 855 }; + + int set12[] = { 857, 858 }; + + int result[] = {}; + OpenBitSet ps0 = new MyOpenBitSet(); + for (int i = 0; i < set0.length; i++) + ps0.set(set0[i]); + + MyOpenBitSet ps1 = new MyOpenBitSet(); + for (int i = 0; i < set1.length; i++) + ps1.set(set1[i]); + + MyOpenBitSet ps2 = new MyOpenBitSet(); + for (int i = 0; i < set2.length; i++) + ps2.set(set2[i]); + + MyOpenBitSet ps3 = new MyOpenBitSet(); + for (int i = 0; i < set3.length; i++) + ps3.set(set3[i]); + + MyOpenBitSet ps4 = new MyOpenBitSet(); + for (int i = 0; i < set4.length; i++) + ps4.set(set4[i]); + + MyOpenBitSet ps5 = new MyOpenBitSet(); + for (int i = 0; i < set5.length; i++) + ps5.set(set5[i]); + + MyOpenBitSet ps6 = new MyOpenBitSet(); + for (int i = 0; i < set6.length; i++) + ps6.set(set6[i]); + + MyOpenBitSet ps7 = new MyOpenBitSet(); + for (int i = 0; i < set7.length; i++) + ps7.set(set7[i]); + + MyOpenBitSet ps8 = new MyOpenBitSet(); + for (int i = 0; i < set8.length; i++) + ps8.set(set8[i]); + + MyOpenBitSet ps9 = new MyOpenBitSet(); + for (int i = 0; i < set9.length; i++) + ps9.set(set9[i]); + + MyOpenBitSet ps10 = new MyOpenBitSet(); + for (int i = 0; i < set10.length; i++) + ps10.set(set10[i]); + + MyOpenBitSet ps11 = new MyOpenBitSet(); + for (int i = 0; i < set11.length; i++) + ps11.set(set11[i]); + + ArrayList sets = new ArrayList(); + sets.add(ps0); + sets.add(ps1); + sets.add(ps2); + sets.add(ps3); + sets.add(ps4); + sets.add(ps5); + sets.add(ps6); + sets.add(ps7); + sets.add(ps8); + sets.add(ps9); + sets.add(ps10); + sets.add(ps11); + + OrDocIdSet ord = new OrDocIdSet(sets); + org.apache.lucene.search.DocIdSetIterator dcit = ord.iterator(); + + try { + File f = new File(serial); + ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(f)); + oos.writeObject(ord); + oos.flush(); + oos.close(); + + } catch (Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + + OrDocIdSet ord2 = null; + + try { + InputStream f = new FileInputStream(new File(serial)); + ObjectInputStream ois = new ObjectInputStream(new BufferedInputStream(f)); + ord2 = (OrDocIdSet) (ois.readObject()); + } catch (Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + + int x = 0; + org.apache.lucene.search.DocIdSetIterator dcit2 = ord2.iterator(); + try { + while (x < result.length) { + int docid1 = dcit.nextDoc(); + int docid2 = dcit2.nextDoc(); + assertEquals(docid1, result[x++]); + assertEquals(docid1,docid2); + } + } catch (IOException e) { + e.printStackTrace(); + fail(e.getMessage()); + } + } + + @Test + public void testNotDocSetSerialization() throws Exception { + + System.out.println(""); + System.out.println("Running NotDocIdSet Serialization Test case..."); + System.out.println("----------------------------"); + + Random random = new Random(); + + int randomizer = 0; + int b = 0; + int length = 1000; + int max = 5400; + + ArrayList intSet = new ArrayList(); + P4DDocIdSet docSet = new P4DDocIdSet(batch); + randomizer = 0; + + for (int i = 1; i < 1000 + 1; i++) { + + int bVal[] = new int[33]; + for (int k = 0; k < batch; k++) { + b = randomizer + (int) (random.nextDouble() * 1000); + intSet.add(b); + + } + + randomizer += 1000; + Collections.sort(intSet); + + } + for (Integer c : intSet) { + docSet.addDoc(c); + } + + DocIdSet not = new NotDocIdSet(docSet, max); + + try { + File f = new File(serial); + ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(f)); + oos.writeObject(not); + oos.flush(); + oos.close(); + + } catch (Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + + NotDocIdSet not2 = null; + + try { + InputStream f = new FileInputStream(new File(serial)); + ObjectInputStream ois = new ObjectInputStream(new BufferedInputStream(f)); + not2 = (NotDocIdSet) (ois.readObject()); + } catch (Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + + org.apache.lucene.search.DocIdSetIterator noit = not.iterator(); + org.apache.lucene.search.DocIdSetIterator noit2 = not2.iterator(); + + try { + int docid; + while ((docid=noit.nextDoc())!=DocIdSetIterator.NO_MORE_DOCS) { + int docid2 = noit2.nextDoc(); + assertFalse(intSet.contains(docid)); + assertEquals(docid, docid2); + } + } catch (Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + + } + + @Test + public void testP4DDocSetSerialization() throws Exception{ + System.out.println(""); + System.out.println("Running P4DDocIdSet Serialization Test case..."); + System.out.println("----------------------------"); + + int result[] = {}; + P4DDocIdSet docSet = new P4DDocIdSet(batch); + + for (int i = 0; i < result.length; i++) { + docSet.addDoc(result[i]); + } + + try { + File f = new File(serial); + ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(f)); + oos.writeObject(docSet); + oos.flush(); + oos.close(); + + } catch (Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + + DocIdSet docSet2 = null; + + try { + InputStream f = new FileInputStream(new File(serial)); + ObjectInputStream ois = new ObjectInputStream(new BufferedInputStream(f)); + docSet2 = (DocIdSet) (ois.readObject()); + } catch (Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + + StatefulDSIterator dcit1 = docSet.iterator(); + org.apache.lucene.search.DocIdSetIterator dcit2 = docSet2.iterator(); + + try { + for (int i = 0; i < result.length; i++) { + int docid1 = dcit1.nextDoc(); + int docid2 = dcit2.nextDoc(); + assertEquals(docid1, result[i]); + assertEquals(docid1, docid2); + } + } catch (Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + + } + + @Test + public void testIntArrayDocSetSerialization() throws Exception{ + + System.out.println(""); + System.out.println("Running IntArrayDocIdSet Serialization Test case..."); + System.out.println("----------------------------"); + + int result[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, + 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, + 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, + 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, + 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, + 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, + 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, + 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, + 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, + 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, + 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, + 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, + 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, + 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, + 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, + 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, + 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, + 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, + 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, + 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, + 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, + 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, + 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, + 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, + 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, + 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, + 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, + 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, + 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, + 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, + 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, + 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, + 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, + 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, + 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, + 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, + 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, + 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, + 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, + 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, + 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, + 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, + 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, + 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, + 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, + 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, + 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, + 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, + 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, + 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, + 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, + 751, 752, 753, 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, + 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, + 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, + 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805, 806, + 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, 819, 820, + 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, + 835, 836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847, 848, + 849, 850, 851, 852, 853, 854, 855, 856, 857, 858 }; + IntArrayDocIdSet docSet = new IntArrayDocIdSet(batch); + + for (int i = 0; i < result.length; i++) { + docSet.addDoc(result[i]); + } + + try { + File f = new File(serial); + ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(f)); + oos.writeObject(docSet); + oos.flush(); + oos.close(); + + } catch (Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + + DocIdSet docSet2 = null; + + try { + InputStream f = new FileInputStream(new File(serial)); + ObjectInputStream ois = new ObjectInputStream(new BufferedInputStream(f)); + docSet2 = (DocIdSet) (ois.readObject()); + } catch (Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + + StatefulDSIterator dcit1 = docSet.iterator(); + org.apache.lucene.search.DocIdSetIterator dcit2 = docSet2.iterator(); + + try { + for (int i = 0; i < result.length; i++) { + int docid1 = dcit1.nextDoc(); + int docid2 = dcit2.nextDoc(); + assertEquals(docid1, result[i]); + assertEquals(docid1, docid2); + } + } catch (Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + } + + @Test + public void testOBSDocSetSerialization() throws Exception{ + + System.out.println(""); + System.out.println("Running OBSDocIdSet Serialization Test case..."); + System.out.println("----------------------------"); + + int result[] = {}; + OBSDocIdSet docSet = new OBSDocIdSet(858); + + for (int i = 0; i < result.length; i++) { + docSet.addDoc(result[i]); + } + + try { + File f = new File(serial); + ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(f)); + oos.writeObject(docSet); + oos.flush(); + oos.close(); + + } catch (Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + + DocIdSet docSet2 = null; + + try { + InputStream f = new FileInputStream(new File(serial)); + ObjectInputStream ois = new ObjectInputStream(new BufferedInputStream(f)); + docSet2 = (DocIdSet) (ois.readObject()); + } catch (Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + + StatefulDSIterator dcit1 = docSet.iterator(); + org.apache.lucene.search.DocIdSetIterator dcit2 = docSet2.iterator(); + + try { + for (int i = 0; i < result.length; i++) { + int docid1 = dcit1.nextDoc(); + int docid2 = dcit2.nextDoc(); + assertEquals(docid1, result[i]); + assertEquals(docid1, docid2); + } + } catch (Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + + } + +} Index: contrib/kamikaze/test/org/apache/lucene/kamikaze/test/TestDocSets.java =================================================================== --- contrib/kamikaze/test/org/apache/lucene/kamikaze/test/TestDocSets.java (revision 0) +++ contrib/kamikaze/test/org/apache/lucene/kamikaze/test/TestDocSets.java (revision 0) @@ -0,0 +1,1645 @@ +package org.apache.lucene.kamikaze.test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.fail; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Random; +import java.util.Set; +import java.util.TreeSet; + +import org.apache.lucene.kamikaze.docidset.api.DocSet; +import org.apache.lucene.kamikaze.docidset.bitset.MyOpenBitSet; +import org.apache.lucene.kamikaze.docidset.impl.AndDocIdSet; +import org.apache.lucene.kamikaze.docidset.impl.IntArrayDocIdSet; +import org.apache.lucene.kamikaze.docidset.impl.NotDocIdSet; +import org.apache.lucene.kamikaze.docidset.impl.OBSDocIdSet; +import org.apache.lucene.kamikaze.docidset.impl.OrDocIdSet; +import org.apache.lucene.kamikaze.docidset.impl.P4DDocIdSet; +import org.apache.lucene.kamikaze.docidset.utils.DocSetFactory; +import org.apache.lucene.kamikaze.docidset.utils.DocSetFactory.FOCUS; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.OpenBitSet; +import org.junit.Test; + + +public class TestDocSets { + + private static final FOCUS SPACE = null; + private static int batch = 128; + + public TestDocSets() { + + } + + @Test + public void testOrBoundary() throws IOException + { + + System.out.println(""); + System.out.println("Running Or Boundary Test case..."); + System.out.println("----------------------------"); + + DocSet dset1 = new IntArrayDocIdSet(2001); + DocSet dset2 = new IntArrayDocIdSet(2001); + dset1.addDoc(0); + dset2.addDoc(1); + + List sets = new ArrayList(); + sets.add(dset1); + sets.add(dset2); + + + + OrDocIdSet ord = new OrDocIdSet(sets); + DocIdSetIterator dcit = ord.iterator(); + int docid = dcit.nextDoc(); + assertEquals(0,docid); + docid = dcit.nextDoc(); + assertEquals(1,docid); + + } + + + @Test + public void combinationSetsSanityTest() throws IOException + { + System.out.println(""); + System.out.println("Running Combination Sanity Test case..."); + System.out.println("----------------------------"); + + + Set set = new TreeSet(); + Random random = new Random(); + + DocSet dset1 = DocSetFactory.getDocSetInstance(0, 500000, 50000/3, DocSetFactory.FOCUS.OPTIMAL); + DocSet dset2 = DocSetFactory.getDocSetInstance(0, 500000, 50000/3, DocSetFactory.FOCUS.OPTIMAL); + DocSet dset3 = DocSetFactory.getDocSetInstance(0, 500000, 50000/3, DocSetFactory.FOCUS.OPTIMAL); + + for(int i=1;i<50001;i++) + { + set.add(random.nextInt(i+1)*10); + } + int i=0; + int s1=0,s2=0,s3=0; + + for(Integer intr : set) + { + if(++i%3==0){ + dset1.addDoc(intr); + s1++; + } + + else if(i%2==0){ + dset2.addDoc(intr); + s2++; + } + else{ + s3++; + dset3.addDoc(intr); + } + } + + assertEquals(s1,dset1.size()); + assertEquals(s2,dset2.size()); + assertEquals(s3,dset3.size()); + + List sets = new ArrayList(); + sets.add(dset1); + sets.add(dset2); + sets.add(dset3); + + OrDocIdSet ord = new OrDocIdSet(sets); + NotDocIdSet not = new NotDocIdSet(ord, 5); + + org.apache.lucene.search.DocIdSetIterator dcit = ord.iterator(); + assertEquals(set.size(),ord.size()); + org.apache.lucene.search.DocIdSetIterator dcit2 = not.iterator(); + + Iterator it = set.iterator(); + int docid; + while((docid = dcit.nextDoc())!=DocIdSetIterator.NO_MORE_DOCS) + { + Integer x = it.next(); + assertEquals(x.intValue(),docid); + + } + + + + it = set.iterator(); + while((docid = dcit2.nextDoc())!=DocIdSetIterator.NO_MORE_DOCS) + { + Integer x = it.next(); + assertFalse(x.intValue()==docid); + } + + } + + + @Test + public void testSmallSetsCombination() throws Exception{ + System.out.println(""); + System.out.println("Running Small Set And test"); + System.out.println("----------------------------"); + + MyOpenBitSet s1 = new MyOpenBitSet(); + MyOpenBitSet s2 = new MyOpenBitSet(); + MyOpenBitSet s3 = new MyOpenBitSet(); + MyOpenBitSet s4 = new MyOpenBitSet(); + + s1.set(0); + s1.set(4); + s1.set(5); + s1.set(6); + + s2.set(5); + s2.set(6); + + s3.set(1); + s3.set(5); + + ArrayList docSet = new ArrayList(); + + docSet.add(s1); + docSet.add(s2); + docSet.add(s3); + + AndDocIdSet ord = new AndDocIdSet(docSet); + + try { + int docid; + for (DocIdSetIterator dcit = ord.iterator(); (docid = dcit.nextDoc())!=DocIdSetIterator.NO_MORE_DOCS;) + assertEquals(docid, 5); + } catch (IOException e) { + e.printStackTrace(); + } + + System.out.println(""); + System.out.println("Running Small Set Not test"); + System.out.println("----------------------------"); + + s1.set(0); + + DocIdSetIterator nsit = new NotDocIdSet(s1, 5).iterator(); + + int i = 1; + try { + int docid; + while ((docid = nsit.nextDoc())!=DocIdSetIterator.NO_MORE_DOCS) + assertEquals(i++, docid); + } catch (IOException e) { + e.printStackTrace(); + } + + } + + @Test + public void testNotDocIdSetSkipSanity() { + + } + + + @Test + public void testOrDocIdSetSkipSanity() { + } + + @Test + public void testCombinationSanitySmallTest() throws Exception{ + System.out.println(""); + System.out.println("Running Combination Small Set Test Case"); + System.out.println("----------------------------"); + + OpenBitSet bs1 = new OpenBitSet(); + OpenBitSet bs2 = new OpenBitSet(); + OpenBitSet bs3 = new OpenBitSet(); + + bs1.set(858); + bs2.set(857); + + ArrayList sets = new ArrayList(); + sets.add(bs1); + sets.add(bs2); + + OrDocIdSet ord = new OrDocIdSet(sets); + + assertEquals(0, ord.findWithIndex(857)); + assertEquals(-1, ord.findWithIndex(1000)); + + + bs3.set(857); + bs3.set(858); + + ArrayList sets2 = new ArrayList(); + sets2.add(ord); + sets2.add(bs3); + + AndDocIdSet and = new AndDocIdSet(sets2); + org.apache.lucene.search.DocIdSetIterator andit = and.iterator(); + int cursor = 0; + try { + int docid; + while ((docid = andit.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + assertEquals(bs3.nextSetBit(docid), docid); + assertEquals(cursor++,and.findWithIndex(docid)); + } + } catch (IOException e) { + fail(e.getMessage()); + } + + } + + + private void _testWideDocSkips(String message, DocIdSet set) throws Exception + { + + System.out.println(message); + + org.apache.lucene.search.DocIdSetIterator dcit = null; + try { + + dcit = set.iterator(); + int docid = dcit.advance(94); + assertEquals(94,docid); + docid = dcit.advance(102); + assertEquals(102,docid); + docid = dcit.advance(500); + assertEquals(500,docid); + docid = dcit.advance(700); + assertEquals(700,docid); + //dcit.skipTo(1001); + //assertEquals(1002, dcit.doc()); + docid = dcit.advance(1788); + assertEquals(1788, docid); + docid = dcit.advance(1901); + assertEquals(1902, docid); + docid = dcit.advance(2400); + assertEquals(2400, docid); + docid = dcit.advance(2401); + assertEquals(2403, docid); + //dcit.skipTo(2403); + //assertEquals(2406, dcit.doc()); + + assertEquals(DocIdSetIterator.NO_MORE_DOCS,dcit.advance(450000)); + + + } + catch (IOException e) { + e.printStackTrace(); + fail(e.getMessage()); + } + + } + + + @Test + public void testWideDocSkips() throws Exception { + + System.out.println(""); + System.out.println("Running Wide Doc Skip sanity test"); + System.out.println("----------------------------"); + + OpenBitSet pset1 = new OpenBitSet(); + OpenBitSet pset2 = new OpenBitSet(); + OpenBitSet pset3 = new OpenBitSet(); + + for (int i = 0; i < 1000; i++) { + pset1.set(i); + pset2.set(i*2); + pset3.set(i*3); + } + + + + List its = new ArrayList(); + its.add(pset1); + its.add(pset2); + its.add(pset3); + OrDocIdSet orSet = new OrDocIdSet(its); + //_testWideDocSkips("Testing skips on OrDocSets", orSet); + + + OpenBitSet pset4 = new OpenBitSet(); + org.apache.lucene.search.DocIdSetIterator orit = orSet.iterator(); + int docid; + while((docid = orit.nextDoc())!=DocIdSetIterator.NO_MORE_DOCS) + { + pset4.set(docid); + } + _testWideDocSkips("Testing skips on OpenBitSets", pset4); + + MyOpenBitSet pset5 = new MyOpenBitSet(); + orit = orSet.iterator(); + while((docid = orit.nextDoc())!=DocIdSetIterator.NO_MORE_DOCS) + { + pset5.set(docid); + } + _testWideDocSkips("Testing skips on MyOpenBitSets", pset5); + + IntArrayDocIdSet pset6 = new IntArrayDocIdSet(); + orit = orSet.iterator(); + while((docid = orit.nextDoc())!=DocIdSetIterator.NO_MORE_DOCS) + { + pset6.addDoc(docid); + } + _testWideDocSkips("Testing skips on IntArrayDocIdSet", pset6); + + P4DDocIdSet pset7 = new P4DDocIdSet(); + orit = orSet.iterator(); + while((docid = orit.nextDoc())!=DocIdSetIterator.NO_MORE_DOCS) + { + pset7.addDoc(docid); + } + _testWideDocSkips("Testing skips on P4DDocIdSet", pset7); + + OBSDocIdSet pset8 = new OBSDocIdSet(2000); + orit = orSet.iterator(); + while((docid = orit.nextDoc())!=DocIdSetIterator.NO_MORE_DOCS) + { + pset8.addDoc(docid); + } + _testWideDocSkips("Testing skips on OBSDocIdSet", pset7); + + + NotDocIdSet pset9 = new NotDocIdSet(new NotDocIdSet(orSet, 3001),3001); + _testWideDocSkips("Testing skips on NotDocIdSet", pset9); + + } + + + + @Test + public void testAndDocIdSetSkipSanity() throws Exception{ + + System.out.println(""); + System.out.println("Running AndDocIdSet Skip sanity test"); + System.out.println("----------------------------"); + int set1[] = { 8, 27, 30, 35, 53, 59, 71, 74, 87, 92, 104, 113, 122, 126, + 135, 135, 137, 138, 185, 186, 188, 192, 197, 227, 227, 230, 242, 252, + 255, 259, 267, 270, 271, 289, 298, 305, 311, 312, 325, 335, 337, 346, + 351, 360, 365, 371, 375, 380, 387, 391, 406, 407, 408, 419, 425, 430, + 443, 451, 454, 456, 464, 466, 469, 473, 478, 483, 496, 502, 517, 527, + 531, 578, 601, 605, 625, 626, 632, 638, 641, 648, 652, 653, 667, 677, + 682, 697, 700, 711, 713, 733, 764, 780, 782, 796, 798, 801, 804, 812, + 817, 831, 835, 849, 859, 872, 886, 891, 893, 895, 903, 908, 914, 915, + 916, 917, 920, 921, 926, 944, 947, 950, 956, 962, 964, 969, 979, 986, + 994, 996, 1018, 1019, 1022, 1025, 1029, 1029, 1039, 1058, 1062, 1063, + 1064, 1068, 1069, 1071, 1075, 1082, 1085, 1096, 1098, 1102, 1103, 1104, + 1104, 1119, 1120, 1122, 1122, 1123, 1147, 1149, 1179, 1183, 1195, 1197, + 1200, 1201, 1214, 1215, 1220, 1221, 1221, 1225, 1229, 1252, 1260, 1261, + 1268, 1269, 1274, 1279, 1293, 1336, 1336, 1348, 1369, 1370, 1375, 1394, + 1401, 1414, 1444, 1453, 1459, 1468, 1473, 1473, 1474, 1485, 1502, 1505, + 1506, 1517, 1518, 1520, 1521, 1522, 1528, 1537, 1543, 1549, 1550, 1560, + 1565, 1566, 1585, 1599, 1604, 1619, 1637, 1650, 1658, 1679, 1684, 1691, + 1691, 1701, 1701, 1715, 1719, 1720, 1722, 1740, 1740, 1748, 1752, 1756, + 1756, 1776, 1796, 1799, 1799, 1800, 1809, 1811, 1828, 1829, 1849, 1859, + 1865, 1868, 1886, 1900, 1933, 1955, 1959, 1983, 1985, 1999, 2003, 2003, + 2029, 2038, 2048, 2050, 2054, 2056, 2059, 2060, 2079, 2095, 2099, 2104, + 2111, 2113, 2119, 2119, 2122, 2123, 2141, 2142, 2145, 2148, 2160, 2182, + 2183, 2200, 2203, 2209, 2210, 2221, 2232, 2261, 2267, 2268, 2272, 2283, + 2297, 2298, 2313, 2314, 2316, 2316, 2331, 2332, 2338, 2343, 2345, 2350, + 2350, 2365, 2378, 2384, 2392, 2399, 2414, 2420, 2425, 2433, 2445, 2457, + 2461, 2462, 2463, 2497, 2503, 2519, 2522, 2533, 2556, 2568, 2577, 2578, + 2578, 2585, 2589, 2603, 2603, 2613, 2616, 2648, 2651, 2662, 2666, 2667, + 2672, 2675, 2679, 2691, 2694, 2694, 2699, 2706, 2708, 2709, 2711, 2711, + 2732, 2736, 2738, 2749, 2750, 2763, 2764, 2770, 2775, 2781, 2793, 2811, + 2817, 2834, 2842, 2847, 2848, 2852, 2856, 2870, 2872, 2876, 2879, 2887, + 2897, 2903, 2980, 2984, 2994, 2997 }; + int set2[] = { 7, 21, 29, 31, 35, 37, 62, 64, 67, 72, 77, 88, 90, 96, 98, + 116, 152, 154, 156, 162, 163, 173, 179, 188, 189, 201, 203, 217, 224, + 233, 263, 267, 271, 277, 294, 301, 311, 336, 343, 349, 390, 395, 396, + 401, 407, 411, 414, 425, 432, 436, 444, 468, 476, 483, 492, 496, 497, + 501, 508, 513, 517, 519, 531, 541, 543, 552, 555, 555, 568, 571, 587, + 589, 594, 601, 604, 606, 625, 633, 634, 645, 649, 654, 655, 662, 664, + 665, 666, 671, 671, 678, 690, 693, 697, 708, 714, 723, 726, 743, 746, + 747, 772, 784, 806, 811, 812, 824, 834, 836, 844, 850, 863, 867, 890, + 890, 896, 905, 931, 933, 934, 940, 952, 959, 963, 968, 974, 978, 997, + 997, 1013, 1015, 1019, 1023, 1030, 1033, 1035, 1047, 1048, 1054, 1069, + 1087, 1147, 1156, 1158, 1165, 1175, 1199, 1211, 1224, 1252, 1255, 1256, + 1259, 1274, 1280, 1283, 1290, 1292, 1292, 1294, 1297, 1299, 1300, 1301, + 1312, 1323, 1337, 1340, 1351, 1352, 1356, 1363, 1385, 1392, 1395, 1399, + 1409, 1413, 1429, 1437, 1460, 1461, 1465, 1466, 1468, 1482, 1497, 1500, + 1501, 1508, 1517, 1524, 1524, 1529, 1530, 1538, 1538, 1544, 1545, 1552, + 1556, 1561, 1566, 1569, 1583, 1598, 1606, 1610, 1613, 1634, 1642, 1643, + 1656, 1675, 1682, 1704, 1708, 1711, 1711, 1719, 1724, 1736, 1740, 1741, + 1766, 1772, 1774, 1777, 1784, 1793, 1814, 1829, 1833, 1843, 1856, 1857, + 1870, 1874, 1879, 1884, 1886, 1890, 1901, 1909, 1912, 1940, 1944, 1946, + 1947, 1948, 1955, 1962, 1971, 1982, 1989, 1995, 1997, 2012, 2015, 2021, + 2043, 2046, 2049, 2055, 2064, 2068, 2069, 2083, 2088, 2100, 2117, 2122, + 2126, 2132, 2143, 2148, 2152, 2152, 2153, 2159, 2173, 2176, 2198, 2198, + 2201, 2205, 2206, 2207, 2211, 2222, 2230, 2254, 2256, 2264, 2268, 2317, + 2318, 2319, 2330, 2334, 2344, 2353, 2353, 2354, 2369, 2374, 2376, 2392, + 2402, 2403, 2414, 2417, 2422, 2424, 2435, 2445, 2461, 2475, 2530, 2539, + 2541, 2542, 2565, 2566, 2571, 2572, 2577, 2579, 2581, 2582, 2586, 2592, + 2595, 2600, 2642, 2645, 2645, 2651, 2668, 2676, 2699, 2705, 2705, 2709, + 2715, 2720, 2720, 2736, 2753, 2756, 2761, 2788, 2792, 2793, 2796, 2801, + 2815, 2834, 2842, 2857, 2859, 2859, 2861, 2865, 2869, 2875, 2879, 2884, + 2885, 2895, 2901, 2906, 2912, 2935, 2940, 2957, 2958, 2967, 2969, 2976, + 2978, 2981, 2984, 2994, 2997 }; + int set3[] = { 2994, 2997 }; + + P4DDocIdSet pset1 = new P4DDocIdSet(batch); + OpenBitSet pset2 = new OpenBitSet(); + P4DDocIdSet pset3 = new P4DDocIdSet(batch); + + for (int i = 0; i < set1.length; i++) { + pset1.addDoc(set1[i]); + pset2.set(set2[i]); + + } + for (int i = 0; i < set3.length; i++) { + pset3.addDoc(set3[i]); + } + + List its = new ArrayList(); + its.add(pset1); + its.add(pset2); + its.add(pset3); + AndDocIdSet andSet = new AndDocIdSet(its); + org.apache.lucene.search.DocIdSetIterator dcit = andSet.iterator(); + int x = set1.length - 2; + + try { + int docid; + while ((docid = dcit.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) + assertEquals(set1[x++], docid); + } catch (IOException e) { + fail(e.getMessage()); + } + + System.out.println("Testing Skips"); + try { + + dcit = andSet.iterator(); + int docid = dcit.advance(94); + assertEquals(2994,docid); + docid = dcit.advance(102); + assertEquals(2997,docid); + //dcit.skipTo(920); + //assertEquals(2994,dcit.doc()); + //dcit.skipTo(2994); + //assertEquals(2994,dcit.doc()); + //dcit.skipTo(2996); + //assertEquals(2997, dcit.doc()); + //dcit.skipTo(2997); + //assertEquals(2997, dcit.doc()); + assertEquals(dcit.advance(450000),DocIdSetIterator.NO_MORE_DOCS); + assertEquals(dcit.advance(2997),DocIdSetIterator.NO_MORE_DOCS); + assertEquals(0, andSet.findWithIndex(2994)); + + } + catch (IOException e) { + fail(e.getMessage()); + } + + } + + + + + + @Test + public void testCombinationSanity()throws Exception { + + System.out.println(""); + int[] set1 = { 4, 19, 21, 35, 36, 43, 43, 73, 85, 104, 105, 106, 112, 118, + 119, 138, 141, 145, 146, 146, 196, 200, 202, 217, 219, 220, 221, 239, + 242, 243, 261, 276, 280, 281, 295, 297, 306, 309, 319, 324, 359, 375, + 376, 387, 398, 401, 406, 438, 442, 450, 450, 462, 469, 475, 495, 499, + 505, 505, 513, 513, 526, 529, 569, 584, 589, 590, 609, 614, 633, 635, + 635, 644, 646, 650, 657, 682, 685, 688, 692, 699, 704, 712, 714, 733, + 736, 739, 746, 748, 766, 768, 774, 776, 778, 786, 799, 801, 812, 814, + 818, 819, 831, 832, 836, 837, 837, 847, 864, 870, 872, 872, 875, 880, + 885, 899, 905, 914, 918, 928, 931, 932, 952, 954, 971, 981, 983, 986, + 992, 998, 1000, 1031, 1032, 1057, 1060, 1061, 1080, 1084, 1090, 1093, + 1100, 1100, 1107, 1109, 1115, 1116, 1139, 1148, 1150, 1159, 1162, 1167, + 1176, 1194, 1200, 1209, 1213, 1217, 1218, 1222, 1225, 1233, 1244, 1246, + 1252, 1277, 1309, 1322, 1325, 1327, 1327, 1329, 1341, 1341, 1342, 1352, + 1359, 1360, 1361, 1363, 1378, 1390, 1391, 1410, 1418, 1427, 1433, 1438, + 1441, 1448, 1449, 1451, 1471, 1488, 1489, 1490, 1500, 1503, 1504, 1505, + 1546, 1555, 1556, 1572, 1575, 1584, 1609, 1614, 1627, 1628, 1629, 1630, + 1638, 1652, 1663, 1664, 1665, 1674, 1686, 1688, 1689, 1692, 1702, 1703, + 1707, 1708, 1708, 1716, 1720, 1720, 1723, 1724, 1727, 1727, 1730, 1733, + 1735, 1738, 1750, 1755, 1758, 1767, 1775, 1786, 1803, 1810, 1812, 1830, + 1848, 1854, 1871, 1888, 1947, 1953, 1962, 1983, 1990, 1999 }; + int[] set2 = { 4, 105, 141, 633, 1953, 1962, 1983, 1990, 1999 }; + int[] set3 = { 4, 145, 146, 146, 196, 200, 202, 217, 219, 1999 }; + int[] set4 = { 4, 200, 202, 217, 219, 220, 221, 239, 242, 243, 261, 276, + 280, 281, 295, 297, 306, 309, 319, 324, 359, 375, 376, 387, 398, 401, + 406, 438, 442, 450, 450, 462, 469, 475, 495, 499, 505, 505, 513, 513, + 526, 529, 569, 584, 589, 590, 609, 614, 633, 635, 635, 644, 646, 650, + 657, 682, 685, 688, 692, 699, 704, 712, 714, 733, 736, 739, 746, 748, + 766, 768, 774, 776, 778, 786, 799, 801, 812, 814, 818, 819, 831, 832, + 836, 837, 837, 847, 864, 870, 872, 872, 875, 880, 885, 899, 905, 914, + 918, 928, 931, 932, 952, 954, 971, 981, 983, 986, 992, 998, 1000, 1031, + 1032, 1057, 1060, 1061, 1080, 1084, 1090, 1093, 1100, 1100, 1107, 1109, + 1115, 1116, 1139, 1148, 1150, 1159, 1162, 1167, 1176, 1194, 1200, 1209, + 1213, 1217, 1218, 1222, 1225, 1233, 1244, 1246, 1252, 1277, 1309, 1322, + 1325, 1327, 1327, 1329, 1341, 1341, 1342, 1352, 1359, 1360, 1361, 1363, + 1378, 1390, 1391, 1410, 1418, 1427, 1433, 1438, 1441, 1448, 1449, 1451, + 1471, 1488, 1489, 1490, 1500, 1503, 1504, 1505, 1546, 1555, 1556, 1572, + 1575, 1584, 1609, 1614, 1627, 1628, 1629, 1630, 1638, 1652, 1663, 1664, + 1665, 1674, 1686, 1688, 1689, 1692, 1702, 1703, 1707, 1708, 1708, 1716, + 1720, 1720, 1723, 1724, 1727, 1727, 1730, 1733, 1735, 1738, 1750, 1755, + 1758, 1767, 1775, 1786, 1803, 1810, 1812, 1830, 1848, 1854, 1871, 1888, + 1947, 1953, 1962, 1983, 1990, 1999 }; + int[] set5 = { 4, 1999 }; + int[] set6 = { 2000 }; + + OpenBitSet ps1 = new OpenBitSet(); + + // Build open bit set + for (int i = 0; i < set1.length; i++) + ps1.set(set1[i]); + + OpenBitSet ps2 = new OpenBitSet(); + + // Build open bit set + for (int i = 0; i < set2.length; i++) + ps2.set(set2[i]); + + OpenBitSet ps3 = new OpenBitSet(); + + // Build open bit set + for (int i = 0; i < set3.length; i++) + ps3.set(set3[i]); + + P4DDocIdSet ps4 = new P4DDocIdSet(128); + + // Build open bit set + for (int i = 0; i < set4.length; i++) + ps4.addDoc(set4[i]); + + OpenBitSet ps5 = new OpenBitSet(); + + // Build open bit set + for (int i = 0; i < set5.length; i++) + ps5.set(set5[i]); + + P4DDocIdSet ps6 = new P4DDocIdSet(128); + ps6.addDoc(2000); + + ArrayList sets = new ArrayList(); + sets.add(ps1); + sets.add(ps2); + sets.add(ps3); + sets.add(ps4); + sets.add(ps5); + + System.out.println("Running Combination Sanity test CASE 1"); + System.out + .println("TEST CASE : Or first 4 sets, AND with the 5th should recover set5"); + System.out.println("----------------------------"); + OrDocIdSet ord = new OrDocIdSet(sets); + + ArrayList sets2 = new ArrayList(); + sets2.add(ord); + sets2.add(ps5); + + AndDocIdSet and = new AndDocIdSet(sets2); + + org.apache.lucene.search.DocIdSetIterator andit = and.iterator(); + + int index = 0; + try { + int docid; + while ((docid = andit.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + if (set5[index++] != docid) + System.err.println("Error in combination test: expected - " + + set5[index - 1] + " but was - " + docid); + } + } catch (IOException e) { + fail(e.getMessage()); + } + + assertEquals("Error: could not recover all and elements: expected length", + set5.length, index); + System.out.println("Combination sanity CASE 1 complete."); + System.out.println(); + + System.out.println("Running Combination Sanity test CASE 2"); + System.out + .println("TEST CASE : AND first 4 sets, AND with the 5th should recover set5"); + System.out.println("----------------------------"); + + AndDocIdSet and1 = new AndDocIdSet(sets); + + sets2 = new ArrayList(); + sets2.add(and1); + sets2.add(ps5); + + AndDocIdSet and2 = new AndDocIdSet(sets2); + + andit = and2.iterator(); + + index = 0; + try { + int docid; + while ((docid = andit.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + if (set5[index++] != docid) + System.err.println("Error in combination test: expected - " + + set5[index - 1] + " but was - " + docid); + } + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + assertEquals("Error: could not recover all and elements:", set5.length, + index); + System.out.println("Combination sanity CASE 2 complete."); + System.out.println(); + + System.out.println("Running Combination Sanity test CASE 3"); + System.out + .println("TEST CASE : OR last 4 sets, OR with the 1st should recover set1"); + System.out.println("----------------------------"); + + sets.clear(); + sets.add(ps2); + sets.add(ps3); + sets.add(ps4); + sets.add(ps5); + + OrDocIdSet or3 = new OrDocIdSet(sets); + + sets2 = new ArrayList(); + sets2.add(or3); + sets2.add(ps1); + + OrDocIdSet or4 = new OrDocIdSet(sets2); + + org.apache.lucene.search.DocIdSetIterator orit = or4.iterator(); + + index = 0; + int ctr = 0; + try { + int docid; + while ((docid = orit.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + index = ps1.nextSetBit(index); + if (index == -1) + System.err.println("Error in combination test: no value expected but was - " + + docid); + else if (index != docid) + System.err.println("Error in combination test: expected - " + + set1[index - 1] + " but was - " + docid); + index++; + ctr++; + } + } catch (IOException e) { + // TODO Auto-generated catch block + fail(e.getMessage()); + } + + assertEquals("Error: could not recover all and elements:", ctr, (int) ps1 + .cardinality()); + System.out.println("Combination sanity CASE 3 complete."); + System.out.println(); + + System.out.println("Running Combination Sanity test CASE 4"); + System.out + .println("TEST CASE : OR last 4 sets, OR with the 1st and ~{2000} should recover 0-1999"); + System.out.println("----------------------------"); + + sets.clear(); + sets.add(ps2); + sets.add(ps3); + sets.add(ps4); + sets.add(ps5); + + OrDocIdSet or5 = new OrDocIdSet(sets); + NotDocIdSet not = new NotDocIdSet(ps6, 2001); + + sets2 = new ArrayList(); + sets2.add(or3); + sets2.add(ps1); + sets2.add(not); + + OrDocIdSet or6 = new OrDocIdSet(sets2); + + orit = or6.iterator(); + + index = 0; + ctr = 0; + + try { + int docid; + while ((docid = orit.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + assertEquals(index++, docid); + } + } catch (IOException e) { + fail(e.getMessage()); + } + + assertEquals("Error: could not recover all and elements", set6[0], index); + + System.out.println("Combination sanity CASE 4 complete."); + System.out.println(); + + } + + + + @Test + public void testCombinationSanityWithOBS() throws Exception{ + + System.out.println("Runnning testCombinationSanityWithOBS"); + int[] set1 = { 4, 19, 21, 35, 36, 43, 73, 85, 104, 105, 106, 112, 118, + 119, 138, 141, 145, 146, 196, 200, 202, 217, 219, 220, 221, 239, + 242, 243, 261, 276, 280, 281, 295, 297, 306, 309, 319, 324, 359, 375, + 376, 387, 398, 401, 406, 438, 442, 450, 462, 469, 475, 495, 499, + 505, 513, 526, 529, 569, 584, 589, 590, 609, 614, 633, 635, + 644, 646, 650, 657, 682, 685, 688, 692, 699, 704, 712, 714, 733, + 736, 739, 746, 748, 766, 768, 774, 776, 778, 786, 799, 801, 812, 814, + 818, 819, 831, 832, 836, 837, 847, 864, 870, 872, 875, 880, + 885, 899, 905, 914, 918, 928, 931, 932, 952, 954, 971, 981, 983, 986, + 992, 998, 1000, 1031, 1032, 1057, 1060, 1061, 1080, 1084, 1090, 1093, + 1100, 1107, 1109, 1115, 1116, 1139, 1148, 1150, 1159, 1162, 1167, + 1176, 1194, 1200, 1209, 1213, 1217, 1218, 1222, 1225, 1233, 1244, 1246, + 1252, 1277, 1309, 1322, 1325, 1327, 1329, 1341, 1342, 1352, + 1359, 1360, 1361, 1363, 1378, 1390, 1391, 1410, 1418, 1427, 1433, 1438, + 1441, 1448, 1449, 1451, 1471, 1488, 1489, 1490, 1500, 1503, 1504, 1505, + 1546, 1555, 1556, 1572, 1575, 1584, 1609, 1614, 1627, 1628, 1629, 1630, + 1638, 1652, 1663, 1664, 1665, 1674, 1686, 1688, 1689, 1692, 1702, 1703, + 1707, 1708, 1716, 1720, 1723, 1724, 1727, 1730, 1733, + 1735, 1738, 1750, 1755, 1758, 1767, 1775, 1786, 1803, 1810, 1812, 1830, + 1848, 1854, 1871, 1888, 1947, 1953, 1962, 1983, 1990, 1999 }; + int[] set2 = { 4, 105, 141, 633, 1953, 1962, 1983, 1990, 1999 }; + int[] set3 = { 4, 145, 146, 146, 196, 200, 202, 217, 219, 1999 }; + int[] set4 = { 4, 200, 202, 217, 219, 220, 221, 239, 242, 243, 261, 276, + 280, 281, 295, 297, 306, 309, 319, 324, 359, 375, 376, 387, 398, 401, + 406, 438, 442, 450, 450, 462, 469, 475, 495, 499, 505, 505, 513, 513, + 526, 529, 569, 584, 589, 590, 609, 614, 633, 635, 635, 644, 646, 650, + 657, 682, 685, 688, 692, 699, 704, 712, 714, 733, 736, 739, 746, 748, + 766, 768, 774, 776, 778, 786, 799, 801, 812, 814, 818, 819, 831, 832, + 836, 837, 837, 847, 864, 870, 872, 872, 875, 880, 885, 899, 905, 914, + 918, 928, 931, 932, 952, 954, 971, 981, 983, 986, 992, 998, 1000, 1031, + 1032, 1057, 1060, 1061, 1080, 1084, 1090, 1093, 1100, 1100, 1107, 1109, + 1115, 1116, 1139, 1148, 1150, 1159, 1162, 1167, 1176, 1194, 1200, 1209, + 1213, 1217, 1218, 1222, 1225, 1233, 1244, 1246, 1252, 1277, 1309, 1322, + 1325, 1327, 1327, 1329, 1341, 1341, 1342, 1352, 1359, 1360, 1361, 1363, + 1378, 1390, 1391, 1410, 1418, 1427, 1433, 1438, 1441, 1448, 1449, 1451, + 1471, 1488, 1489, 1490, 1500, 1503, 1504, 1505, 1546, 1555, 1556, 1572, + 1575, 1584, 1609, 1614, 1627, 1628, 1629, 1630, 1638, 1652, 1663, 1664, + 1665, 1674, 1686, 1688, 1689, 1692, 1702, 1703, 1707, 1708, 1708, 1716, + 1720, 1720, 1723, 1724, 1727, 1727, 1730, 1733, 1735, 1738, 1750, 1755, + 1758, 1767, 1775, 1786, 1803, 1810, 1812, 1830, 1848, 1854, 1871, 1888, + 1947, 1953, 1962, 1983, 1990, 1999 }; + int[] set5 = { 4, 1999 }; + int[] set6 = { 2000 }; + + OBSDocIdSet ps1 = new OBSDocIdSet(3000); + + // Build open bit set + for (int i = 0; i < set1.length; i++) + ps1.addDoc(set1[i]); + + OBSDocIdSet ps2 = new OBSDocIdSet(3000); + + // Build open bit set + for (int i = 0; i < set2.length; i++) + ps2.addDoc(set2[i]); + + + OBSDocIdSet ps3= new OBSDocIdSet(3000); + + // Build open bit set + for (int i = 0; i < set3.length; i++) + ps3.addDoc(set3[i]); + + OBSDocIdSet ps4 = new OBSDocIdSet(3000); + + // Build open bit set + for (int i = 0; i < set4.length; i++) + ps4.addDoc(set4[i]); + + OBSDocIdSet ps5 = new OBSDocIdSet(3000); + + // Build open bit set + for (int i = 0; i < set5.length; i++) + ps5.addDoc(set5[i]); + + OBSDocIdSet ps6 = new OBSDocIdSet(3000); + ps6.addDoc(2000); + + ArrayList sets = new ArrayList(); + sets.add(ps1); + sets.add(ps2); + sets.add(ps3); + sets.add(ps4); + sets.add(ps5); + + System.out.println("Running Combination Sanity test CASE 1"); + System.out + .println("TEST CASE : Or first 4 sets, AND with the 5th should recover set5"); + System.out.println("----------------------------"); + OrDocIdSet ord = new OrDocIdSet(sets); + + ArrayList sets2 = new ArrayList(); + sets2.add(ord); + sets2.add(ps5); + + AndDocIdSet and = new AndDocIdSet(sets2); + + org.apache.lucene.search.DocIdSetIterator andit = and.iterator(); + + int index = 0; + try { + int docid; + while ((docid = andit.nextDoc())!=DocIdSetIterator.NO_MORE_DOCS) { + if (set5[index++] != docid) + System.err.println("Error in combination test: expected - " + + set5[index - 1] + " but was - " + docid); + } + } catch (IOException e) { + fail(e.getMessage()); + } + + assertEquals("Error: could not recover all and elements: expected length", + set5.length, index); + System.out.println("Combination sanity CASE 1 complete."); + System.out.println(); + + System.out.println("Running Combination Sanity test CASE 2"); + System.out + .println("TEST CASE : AND first 4 sets, AND with the 5th should recover set5"); + System.out.println("----------------------------"); + + AndDocIdSet and1 = new AndDocIdSet(sets); + + sets2 = new ArrayList(); + sets2.add(and1); + sets2.add(ps5); + + AndDocIdSet and2 = new AndDocIdSet(sets2); + + andit = and2.iterator(); + + index = 0; + try { + int docid; + while ((docid = andit.nextDoc())!=DocIdSetIterator.NO_MORE_DOCS) { + if (set5[index++] != docid) + System.err.println("Error in combination test: expected - " + + set5[index - 1] + " but was - " + docid); + } + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + assertEquals("Error: could not recover all and elements:", set5.length, + index); + System.out.println("Combination sanity CASE 2 complete."); + System.out.println(); + + System.out.println("Running Combination Sanity test CASE 3"); + System.out + .println("TEST CASE : OR last 4 sets, OR with the 1st should recover set1"); + System.out.println("----------------------------"); + + sets.clear(); + sets.add(ps2); + sets.add(ps3); + sets.add(ps4); + sets.add(ps5); + + OrDocIdSet or3 = new OrDocIdSet(sets); + + sets2 = new ArrayList(); + sets2.add(or3); + sets2.add(ps1); + + OrDocIdSet or4 = new OrDocIdSet(sets2); + + org.apache.lucene.search.DocIdSetIterator orit = or4.iterator(); + + index = 0; + int ctr = 0; + try { + int docid; + while ((docid = orit.nextDoc())!=DocIdSetIterator.NO_MORE_DOCS) { + index = ps1.findWithIndex(docid); + assertFalse("Error in combination test: no value expected but was - " + + docid,index==-1); + assertFalse("Error in combination test: expected - " + + set1[index] + " but was - " + docid, set1[index] != docid); + + + } + } catch (IOException e) { + // TODO Auto-generated catch block + fail(e.getMessage()); + } + + assertEquals("Error: could not recover all and elements:", set1.length, (int) ps1.size()); + System.out.println("Combination sanity CASE 3 complete."); + System.out.println(); + + System.out.println("Running Combination Sanity test CASE 4"); + System.out + .println("TEST CASE : OR last 4 sets, OR with the 1st and ~{2000} should recover 0-1999"); + System.out.println("----------------------------"); + + sets.clear(); + sets.add(ps2); + sets.add(ps3); + sets.add(ps4); + sets.add(ps5); + + OrDocIdSet or5 = new OrDocIdSet(sets); + NotDocIdSet not = new NotDocIdSet(ps6, 2001); + + sets2 = new ArrayList(); + sets2.add(or3); + sets2.add(ps1); + sets2.add(not); + + OrDocIdSet or6 = new OrDocIdSet(sets2); + + orit = or6.iterator(); + + index = 0; + ctr = 0; + + try { + int docid; + while ((docid = orit.nextDoc())!=DocIdSetIterator.NO_MORE_DOCS) { + assertEquals(index++, docid); + } + } catch (IOException e) { + fail(e.getMessage()); + } + + assertEquals("Error: could not recover all and elements", set6[0], index); + + System.out.println("Combination sanity CASE 4 complete."); + System.out.println(); + + } + + + @Test + public void testWideCombinationCase() throws Exception { + System.out.println(""); + System.out.println("Running Wide Combination Test case..."); + System.out.println("----------------------------"); + + int set0[] = { 9, 20, 31, 42, 65, 76, 87, 108, 119, 130, 141, 152, 163, + 186, 197, 208, 219, 230, 241, 265, 276, 287, 298, 309, 332, 343, 354, + 365, 376, 387, 410, 421, 431, 442, 453, 476, 487, 498, 509, 520, 531, + 554, 565, 575, 586, 597, 608, 619, 630, 653, 664, 675, 686, 697, 708, + 717, 728, 739, 750, 773, 784, 814, 820, 831, 842, 853 }; + int set1[] = { 8, 19, 30, 53, 64, 75, 86, 96, 107, 118, 129, 140, 151, 174, + 185, 196, 207, 218, 229, 252, 264, 275, 286, 297, 320, 331, 342, 353, + 364, 375, 398, 409, 420, 430, 441, 464, 475, 486, 497, 508, 519, 542, + 553, 564, 574, 585, 596, 607, 618, 641, 652, 663, 674, 685, 696, 716, + 727, 738, 761, 772, 783, 802, 813, 819, 830, 841 }; + int set2[] = { 7, 41, 52, 63, 74, 85, 106, 117, 128, 139, 162, 173, 184, + 195, 206, 217, 240, 251, 263, 274, 285, 308, 319, 330, 341, 352, 363, + 386, 397, 408, 419, 429, 452, 463, 474, 485, 496, 507, 530, 541, 552, + 563, 573, 584, 595, 606, 629, 640, 651, 662, 673, 684, 707, 715, 726, + 749, 760, 771, 782, 791, 801, 812, 818, 829, 852, 858 }; + int set3[] = { 6, 29, 40, 51, 62, 73, 84, 105, 116, 127, 150, 161, 172, + 183, 194, 205, 228, 239, 250, 262, 273, 296, 307, 318, 329, 340, 351, + 374, 385, 396, 407, 418, 440, 451, 462, 473, 484, 495, 518, 529, 540, + 551, 562, 572, 583, 594, 617, 628, 639, 650, 661, 672, 695, 706, 714, + 737, 748, 759, 770, 781, 790, 793, 800, 811, 840, 851 }; + int set4[] = { 17, 28, 39, 50, 61, 72, 95, 104, 115, 138, 149, 160, 171, + 182, 193, 216, 227, 238, 249, 260, 261, 284, 295, 306, 317, 328, 339, + 362, 373, 384, 395, 406, 417, 439, 450, 461, 472, 483, 506, 517, 528, + 539, 550, 561, 582, 605, 616, 627, 638, 649, 660, 683, 694, 705, 725, + 736, 747, 758, 769, 780, 789, 799, 810, 828, 839, 850 }; + int set5[] = { 5, 16, 27, 38, 49, 60, 83, 94, 103, 126, 137, 148, 159, 170, + 181, 204, 215, 226, 237, 248, 259, 272, 283, 294, 305, 316, 327, 350, + 361, 372, 383, 394, 405, 428, 438, 449, 460, 471, 494, 505, 516, 527, + 538, 549, 593, 604, 615, 626, 637, 648, 671, 682, 693, 704, 724, 735, + 746, 757, 768, 788, 792, 798, 809, 827, 838, 849 }; + int set6[] = { 4, 15, 26, 37, 48, 71, 82, 93, 114, 125, 136, 147, 158, 169, + 192, 203, 214, 225, 236, 247, 271, 282, 293, 304, 315, 338, 349, 360, + 371, 382, 393, 416, 427, 437, 448, 459, 482, 493, 504, 515, 526, 537, + 560, 571, 581, 592, 603, 614, 625, 636, 659, 670, 681, 692, 703, 723, + 734, 745, 756, 779, 787, 796, 797, 826, 837, 848 }; + int set7[] = { 3, 14, 25, 36, 59, 70, 81, 92, 102, 113, 124, 135, 146, 157, + 180, 191, 202, 213, 224, 235, 258, 270, 281, 292, 303, 326, 337, 348, + 359, 370, 381, 404, 415, 426, 436, 447, 470, 481, 492, 503, 514, 525, + 548, 559, 570, 580, 591, 602, 613, 624, 647, 658, 669, 680, 691, 702, + 722, 733, 744, 767, 778, 795, 808, 825, 836, 847 }; + int set8[] = { 2, 13, 24, 47, 58, 69, 80, 91, 101, 112, 123, 134, 145, 168, + 179, 190, 201, 212, 223, 246, 257, 269, 280, 291, 314, 325, 336, 347, + 358, 369, 392, 403, 414, 425, 435, 458, 469, 480, 491, 502, 513, 536, + 547, 558, 569, 579, 590, 601, 612, 635, 646, 657, 668, 679, 690, 713, + 721, 732, 755, 766, 777, 786, 794, 807, 824, 835 }; + int set9[] = { 1, 10, 12, 21, 32, 35, 43, 46, 54, 57, 68, 77, 79, 88, 90, + 97, 100, 111, 120, 122, 131, 133, 142, 153, 156, 164, 167, 175, 178, + 189, 198, 200, 209, 211, 220, 231, 234, 242, 245, 253, 256, 266, 268, + 277, 279, 288, 299, 302, 310, 313, 321, 324, 335, 344, 346, 355, 357, + 366, 377, 380, 388, 391, 399, 402, 413, 422, 424, 432, 443, 446, 454, + 457, 465, 468, 479, 488, 490, 499, 501, 510, 521, 524, 532, 535, 543, + 546, 557, 566, 568, 578, 587, 589, 598, 600, 609, 620, 623, 631, 634, + 642, 645, 656, 665, 667, 676, 678, 687, 698, 701, 709, 712, 718, 720, + 729, 740, 743, 751, 754, 762, 765, 776, 785, 803, 806, 817, 821, 823, + 832, 843, 846, 854 }; + int set10[] = { 23, 34, 45, 56, 67, 78, 99, 110, 121, 144, 155, 166, 177, + 188, 199, 222, 233, 244, 255, 267, 290, 301, 312, 323, 334, 345, 368, + 379, 390, 401, 412, 423, 434, 445, 456, 467, 478, 489, 512, 523, 534, + 545, 556, 567, 577, 588, 611, 622, 633, 644, 655, 666, 689, 700, 711, + 731, 742, 753, 764, 775, 805, 816, 834, 845, 856, 857 }; + int set11[] = { 11, 22, 33, 44, 55, 66, 89, 98, 109, 132, 143, 154, 165, + 176, 187, 210, 221, 232, 243, 254, 278, 289, 300, 311, 322, 333, 356, + 367, 378, 389, 400, 411, 433, 444, 455, 466, 477, 500, 511, 522, 533, + 544, 555, 576, 599, 610, 621, 632, 643, 654, 677, 688, 699, 710, 719, + 730, 741, 752, 763, 774, 804, 815, 822, 833, 844, 855 }; + + int set12[] = { 857, 858 }; + + int result[] = {}; + OpenBitSet ps0 = new OpenBitSet(); + for (int i = 0; i < set0.length; i++) + ps0.set(set0[i]); + + OpenBitSet ps1 = new OpenBitSet(); + for (int i = 0; i < set1.length; i++) + ps1.set(set1[i]); + + OpenBitSet ps2 = new OpenBitSet(); + for (int i = 0; i < set2.length; i++) + ps2.set(set2[i]); + + OpenBitSet ps3 = new OpenBitSet(); + for (int i = 0; i < set3.length; i++) + ps3.set(set3[i]); + + OpenBitSet ps4 = new OpenBitSet(); + for (int i = 0; i < set4.length; i++) + ps4.set(set4[i]); + + OpenBitSet ps5 = new OpenBitSet(); + for (int i = 0; i < set5.length; i++) + ps5.set(set5[i]); + + OpenBitSet ps6 = new OpenBitSet(); + for (int i = 0; i < set6.length; i++) + ps6.set(set6[i]); + + OpenBitSet ps7 = new OpenBitSet(); + for (int i = 0; i < set7.length; i++) + ps7.set(set7[i]); + + OpenBitSet ps8 = new OpenBitSet(); + for (int i = 0; i < set8.length; i++) + ps8.set(set8[i]); + + OpenBitSet ps9 = new OpenBitSet(); + for (int i = 0; i < set9.length; i++) + ps9.set(set9[i]); + + OpenBitSet ps10 = new OpenBitSet(); + for (int i = 0; i < set10.length; i++) + ps10.set(set10[i]); + + OpenBitSet ps11 = new OpenBitSet(); + for (int i = 0; i < set11.length; i++) + ps11.set(set11[i]); + + ArrayList sets = new ArrayList(); + sets.add(ps0); + sets.add(ps1); + sets.add(ps2); + sets.add(ps3); + sets.add(ps4); + sets.add(ps5); + sets.add(ps6); + sets.add(ps7); + sets.add(ps8); + sets.add(ps9); + sets.add(ps10); + sets.add(ps11); + + OrDocIdSet ord = new OrDocIdSet(sets); + org.apache.lucene.search.DocIdSetIterator dcit = ord.iterator(); + + int x = 0; + try { + int docid; + while ((docid = dcit.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + assertEquals(docid, result[x++]); + } + } catch (IOException e) { + e.printStackTrace(); + fail(e.getMessage()); + } + + OpenBitSet ps12 = new OpenBitSet(); + for (int i = 0; i < set12.length; i++) + ps12.set(set12[i]); + + ArrayList sets2 = new ArrayList(); + sets2.add(ord); + sets2.add(ps12); + + AndDocIdSet andSet = new AndDocIdSet(sets2); + org.apache.lucene.search.DocIdSetIterator andit = andSet.iterator(); + + x = 0; + try { + int docid; + while ((docid = andit.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) + assertEquals(set12[x++], docid); + } catch (IOException e) { + e.printStackTrace(); + + } + System.out.println(""); + + } + + /* + public void testDenseConstructionTime() + { + System.out.println(""); + System.out.println("Running Dense construction time test case..."); + System.out.println("----------------------------"); + + for(int test=0;test<5;test++) + { + long time = System.nanoTime(); + IntArrayDocIdSet docSet = new IntArrayDocIdSet(20000000); + for(int i=0;i<20000000;i++) + { + docSet.addDoc(i); + } + System.out.println("Time for IntArray construction:"+(System.nanoTime()-time)+" ns"); + time = System.nanoTime(); + OBSDocIdSet docSet2 = new OBSDocIdSet(20000000); + for(int i=0;i<20000000;i++) + { + docSet2.addDoc(i); + } + System.out.println("Time for OpenBitSet construction:"+(System.nanoTime()-time)+" ns"); + time = System.nanoTime(); + P4DDocIdSet docSet3 = new P4DDocIdSet(); + for(int i=0;i<20000000;i++) + { + docSet3.addDoc(i+test); + } + System.out.println("Time for P4D Set construction:"+(System.nanoTime()-time)+" ns"); + + } + }*/ + + + @Test + public void testContainsCalls() throws Exception + { + System.out.println(""); + System.out.println("Running Find time test case..."); + System.out.println("----------------------------"); + + for(int test=0;test<5;test++) + { + IntArrayDocIdSet docSet = new IntArrayDocIdSet(20000000); + for(int i=0;i<20000000;i++) + { + docSet.addDoc(i); + } + long time = System.nanoTime(); + for(int i=0;i<20000000;i+=5) + docSet.find(i); + System.out.println("Time for"+(20000000/5)+ " IntArray Find:"+(System.nanoTime()-time)+" ns"); + time = System.nanoTime(); + + OBSDocIdSet docSet2 = new OBSDocIdSet(20000000); + for(int i=0;i<20000000;i++) + { + docSet2.addDoc(i); + } + time = System.nanoTime(); + for(int i=0;i<20000000;i+=5) + docSet2.find(i); + System.out.println("Time for"+(20000000/5)+ " OBSDocSet Find:"+(System.nanoTime()-time)+" ns"); + + time = System.nanoTime(); + P4DDocIdSet docSet3 = new P4DDocIdSet(); + for(int i=0;i<20000000;i++) + { + docSet3.addDoc(i+5); + } + + time = System.nanoTime(); + for(int i=0;i<20000000;i+=6) + docSet3.find(i); + System.out.println("Time for"+(20000000/6)+ " P4D Find:"+(System.nanoTime()-time)+" ns"); + + + } + } + + @Test + public void testFindOnP4D() + { + System.out.println(""); + System.out.println("Running testFindOnP4D..."); + System.out.println("----------------------------"); + + + P4DDocIdSet docSet3 = new P4DDocIdSet(); + ArrayList list = new ArrayList(); + for(int i=0;i<20000000;i+=5) + { + list.add(i); + docSet3.addDoc(i); + } + assertEquals(false,docSet3.find(3)); + + for(Integer val: list) + { + assertEquals(true, docSet3.find(val)); + assertEquals(false, docSet3.find(val-1)); + } + + list.clear(); + docSet3 = new P4DDocIdSet(); + for(int i=0;i<20000000;i+=6) + { + list.add(i); + docSet3.addDoc(i); + } + + for(Integer val: list) + { + assertEquals(true, docSet3.find(val)); + assertEquals(false, docSet3.find(val+2)); + } + + list.clear(); + + docSet3 = new P4DDocIdSet(); + assertFalse(docSet3.find(34)); + for(int i=1;i<257;i++) + { + list.add(i); + + docSet3.addDoc(i); + } + + + for(Integer val : list) + { + + assertEquals(true, docSet3.find(val)); + assertEquals(false, docSet3.find(val+258)); + assertEquals(false,docSet3.find(555)); + + } + assertEquals(false, docSet3.find(258)); + + + list.clear(); + + docSet3 = new P4DDocIdSet(); + assertFalse(docSet3.find(34)); + for(int i=1;i<33;i++) + { + list.add(i); + + docSet3.addDoc(i); + } + + + for(Integer val : list) + { + + assertEquals(true, docSet3.find(val)); + assertEquals(false, docSet3.find(val+258)); + assertEquals(false,docSet3.find(555)); + + } + assertEquals(false, docSet3.find(258)); + + + } + + @Test + public void testWideCombinationCase2() throws IOException { + System.out.println(""); + System.out.println("Running Wide Combination Test case 2..."); + System.out.println("----------------------------"); + + int set0[] = { 9, 20, 31, 42, 65, 76, 87, 108, 119, 130, 141, 152, 163, + 186, 197, 208, 219, 230, 241, 265, 276, 287, 298, 309, 332, 343, 354, + 365, 376, 387, 410, 421, 431, 442, 453, 476, 487, 498, 509, 520, 531, + 554, 565, 575, 586, 597, 608, 619, 630, 653, 664, 675, 686, 697, 708, + 717, 728, 739, 750, 773, 784, 814, 820, 831, 842, 853 }; + int set1[] = { 8, 19, 30, 53, 64, 75, 86, 96, 107, 118, 129, 140, 151, 174, + 185, 196, 207, 218, 229, 252, 264, 275, 286, 297, 320, 331, 342, 353, + 364, 375, 398, 409, 420, 430, 441, 464, 475, 486, 497, 508, 519, 542, + 553, 564, 574, 585, 596, 607, 618, 641, 652, 663, 674, 685, 696, 716, + 727, 738, 761, 772, 783, 802, 813, 819, 830, 841 }; + int set2[] = { 7, 18, 41, 52, 63, 74, 85, 106, 117, 128, 139, 162, 173, + 184, 195, 206, 217, 240, 251, 263, 274, 285, 308, 319, 330, 341, 352, + 363, 386, 397, 408, 419, 429, 452, 463, 474, 485, 496, 507, 530, 541, + 552, 563, 573, 584, 595, 606, 629, 640, 651, 662, 673, 684, 707, 715, + 726, 749, 760, 771, 782, 791, 801, 812, 818, 829, 852 }; + int set3[] = { 6, 29, 40, 51, 62, 73, 84, 105, 116, 127, 150, 161, 172, + 183, 194, 205, 228, 239, 250, 262, 273, 296, 307, 318, 329, 340, 351, + 374, 385, 396, 407, 418, 440, 451, 462, 473, 484, 495, 518, 529, 540, + 551, 562, 572, 583, 594, 617, 628, 639, 650, 661, 672, 695, 706, 714, + 737, 748, 759, 770, 781, 790, 793, 800, 811, 840, 851 }; + int set4[] = { 17, 28, 39, 50, 61, 72, 95, 104, 115, 138, 149, 160, 171, + 182, 193, 216, 227, 238, 249, 260, 261, 284, 295, 306, 317, 328, 339, + 362, 373, 384, 395, 406, 417, 439, 450, 461, 472, 483, 506, 517, 528, + 539, 550, 561, 582, 605, 616, 627, 638, 649, 660, 683, 694, 705, 725, + 736, 747, 758, 769, 780, 789, 799, 810, 828, 839, 850 }; + int set5[] = { 5, 16, 27, 38, 49, 60, 83, 94, 103, 126, 137, 148, 159, 170, + 181, 204, 215, 226, 237, 248, 259, 272, 283, 294, 305, 316, 327, 350, + 361, 372, 383, 394, 405, 428, 438, 449, 460, 471, 494, 505, 516, 527, + 538, 549, 593, 604, 615, 626, 637, 648, 671, 682, 693, 704, 724, 735, + 746, 757, 768, 788, 792, 798, 809, 827, 838, 849 }; + int set6[] = { 4, 15, 26, 37, 48, 71, 82, 93, 114, 125, 136, 147, 158, 169, + 192, 203, 214, 225, 236, 247, 271, 282, 293, 304, 315, 338, 349, 360, + 371, 382, 393, 416, 427, 437, 448, 459, 482, 493, 504, 515, 526, 537, + 560, 571, 581, 592, 603, 614, 625, 636, 659, 670, 681, 692, 703, 723, + 734, 745, 756, 779, 787, 796, 797, 826, 837, 848 }; + int set7[] = { 3, 14, 25, 36, 59, 70, 81, 92, 102, 113, 124, 135, 146, 157, + 180, 191, 202, 213, 224, 235, 258, 270, 281, 292, 303, 326, 337, 348, + 359, 370, 381, 404, 415, 426, 436, 447, 470, 481, 492, 503, 514, 525, + 548, 559, 570, 580, 591, 602, 613, 624, 647, 658, 669, 680, 691, 702, + 722, 733, 744, 767, 778, 795, 808, 825, 836, 847 }; + int set8[] = { 2, 13, 24, 47, 58, 69, 80, 91, 101, 112, 123, 134, 145, 168, + 179, 190, 201, 212, 223, 246, 257, 269, 280, 291, 314, 325, 336, 347, + 358, 369, 392, 403, 414, 425, 435, 458, 469, 480, 491, 502, 513, 536, + 547, 558, 569, 579, 590, 601, 612, 635, 646, 657, 668, 679, 690, 713, + 721, 732, 755, 766, 777, 786, 794, 807, 824, 835 }; + int set9[] = { 1, 10, 12, 21, 32, 35, 43, 46, 54, 57, 68, 77, 79, 88, 90, + 97, 100, 111, 120, 122, 131, 133, 142, 153, 156, 164, 167, 175, 178, + 189, 198, 200, 209, 211, 220, 231, 234, 242, 245, 253, 256, 266, 268, + 277, 279, 288, 299, 302, 310, 313, 321, 324, 335, 344, 346, 355, 357, + 366, 377, 380, 388, 391, 399, 402, 413, 422, 424, 432, 443, 446, 454, + 457, 465, 468, 479, 488, 490, 499, 501, 510, 521, 524, 532, 535, 543, + 546, 557, 566, 568, 578, 587, 589, 598, 600, 609, 620, 623, 631, 634, + 642, 645, 656, 665, 667, 676, 678, 687, 698, 701, 709, 712, 718, 720, + 729, 740, 743, 751, 754, 762, 765, 776, 785, 803, 806, 817, 821, 823, + 832, 843, 846, 854 }; + int set10[] = { 23, 34, 45, 56, 67, 78, 99, 110, 121, 144, 155, 166, 177, + 188, 199, 222, 233, 244, 255, 267, 290, 301, 312, 323, 334, 345, 368, + 379, 390, 401, 412, 423, 434, 445, 456, 467, 478, 489, 512, 523, 534, + 545, 556, 567, 577, 588, 611, 622, 633, 644, 655, 666, 689, 700, 711, + 731, 742, 753, 764, 775, 805, 816, 834, 845, 856, 857, 858 }; + int set11[] = { 11, 22, 33, 44, 55, 66, 89, 98, 109, 132, 143, 154, 165, + 176, 187, 210, 221, 232, 243, 254, 278, 289, 300, 311, 322, 333, 356, + 367, 378, 389, 400, 411, 433, 444, 455, 466, 477, 500, 511, 522, 533, + 544, 555, 576, 599, 610, 621, 632, 643, 654, 677, 688, 699, 710, 719, + 730, 741, 752, 763, 774, 804, 815, 822, 833, 844, 855 }; + int set12[] = { 857, 858 }; + + int result[] = {}; + + OBSDocIdSet ps0 = new OBSDocIdSet(1000); + for (int i = 0; i < set0.length; i++) + ps0.addDoc(set0[i]); + + OBSDocIdSet ps1 = new OBSDocIdSet(1000); + for (int i = 0; i < set1.length; i++) + ps1.addDoc(set1[i]); + + OBSDocIdSet ps2 = new OBSDocIdSet(1000); + for (int i = 0; i < set2.length; i++) + ps2.addDoc(set2[i]); + + OBSDocIdSet ps3 = new OBSDocIdSet(1000); + for (int i = 0; i < set3.length; i++) + ps3.addDoc(set3[i]); + + OBSDocIdSet ps4 = new OBSDocIdSet(1000); + for (int i = 0; i < set4.length; i++) + ps4.addDoc(set4[i]); + + OBSDocIdSet ps5 = new OBSDocIdSet(1000); + for (int i = 0; i < set5.length; i++) + ps5.addDoc(set5[i]); + + OpenBitSet ps6 = new OpenBitSet(); + for (int i = 0; i < set6.length; i++) + ps6.set(set6[i]); + + OpenBitSet ps7 = new OpenBitSet(); + for (int i = 0; i < set7.length; i++) + ps7.set(set7[i]); + + OpenBitSet ps8 = new OpenBitSet(); + for (int i = 0; i < set8.length; i++) + ps8.set(set8[i]); + + P4DDocIdSet ps9 = new P4DDocIdSet(128); + for (int i = 0; i < set9.length; i++) + ps9.addDoc(set9[i]); + + OpenBitSet ps10 = new OpenBitSet(); + for (int i = 0; i < set10.length; i++) + ps10.set(set10[i]); + + OpenBitSet ps11 = new OpenBitSet(); + for (int i = 0; i < set11.length; i++) + ps11.set(set11[i]); + + ArrayList sets = new ArrayList(); + sets.add(ps0); + sets.add(ps1); + sets.add(ps2); + sets.add(ps3); + sets.add(ps4); + sets.add(ps5); + sets.add(ps6); + sets.add(ps7); + sets.add(ps8); + sets.add(ps9); + sets.add(ps10); + sets.add(ps11); + + OrDocIdSet ord = new OrDocIdSet(sets); + org.apache.lucene.search.DocIdSetIterator dcit = ord.iterator(); + + int x = 0; + + int docid; + while ((docid=dcit.nextDoc())!=DocIdSetIterator.NO_MORE_DOCS) + assertEquals(result[x++], docid); + + OpenBitSet ps12 = new OpenBitSet(); + for (int i = 0; i < set12.length; i++) + ps12.set(set12[i]); + + ArrayList sets2 = new ArrayList(); + sets2.add(ord); + sets2.add(ps12); + + AndDocIdSet andSet = new AndDocIdSet(sets2); + org.apache.lucene.search.DocIdSetIterator andit = andSet.iterator(); + + x = 0; + + while ((docid = andit.nextDoc())!=DocIdSetIterator.NO_MORE_DOCS) + assertEquals(set12[x++], docid); + + } + + @Test + public void testP4DDocIdSetNoExceptionCompressionRatio() + { + boolean failed = false; + System.out.println(""); + System.out.println("Running P4DeltaDocSet No Exception Compression Ratio test"); + System.out.println("----------------------------"); + + final int max = 100000; + + for(int j = 0; j < 31; j++) + { + try + { + P4DDocIdSet set = new P4DDocIdSet(batch); + long time = System.nanoTime(); + + int counter=0; + for(int c = 0; c >= 0 && counter < max; c += (1 << j)) + { + set.addDoc(c); + counter++; + } + set.optimize(); + //System.out.println("Time to construct:"+(System.nanoTime() - time)+" ns"); + System.out.println("Delta:" + (1 << j) + " numOfItems:" + counter + " Blob Size:"+set.totalBlobSize()); + } + catch(Exception ex) + { + System.out.println("Delta:" + (1 << j) + " Failed"); + failed = true; + } + } + assertFalse("compresseion failed", failed); + } +} Index: contrib/kamikaze/test/org/apache/lucene/kamikaze/test/TestMultiThreadedAccess.java =================================================================== --- contrib/kamikaze/test/org/apache/lucene/kamikaze/test/TestMultiThreadedAccess.java (revision 0) +++ contrib/kamikaze/test/org/apache/lucene/kamikaze/test/TestMultiThreadedAccess.java (revision 0) @@ -0,0 +1,170 @@ +package org.apache.lucene.kamikaze.test; + +import static org.junit.Assert.assertEquals; + +import java.io.IOException; +import java.util.Collections; +import java.util.LinkedList; +import java.util.List; +import java.util.Random; + +import org.apache.lucene.kamikaze.docidset.api.StatefulDSIterator; +import org.apache.lucene.kamikaze.docidset.impl.P4DDocIdSet; +import org.apache.lucene.search.DocIdSetIterator; +import org.junit.Test; + + + + +public class TestMultiThreadedAccess { + + + int _length = 10000; + int _max = 30000000; + @Test + public void testSkipPerformance() throws IOException, InterruptedException + { + System.out.println(""); + System.out.println("Running Doc Skip Multithreaded"); + System.out.println("----------------------------"); + + double booster = ((_max*1.0)/(1000f*_length)); + P4DDocIdSet set = new P4DDocIdSet(); + Random random = new Random(); + + int max = 1000; + + int randomizer = 0; + double totalDecompressionTime = 0; + List list = new LinkedList(); + LinkedList list2 = new LinkedList(); + int prev = 0; + for (int i = 0; i < _length*256; i++) { + prev +=i; + list.add(prev); + } + + Collections.sort(list); + //System.out.println("Largest Element in the List:"+list.get( list.size() -1 )); + + + + //P4D + final P4DDocIdSet p4d = new P4DDocIdSet(); + int counter=0; + + for (Integer c : list) { + counter++; + //System.out.println(c); + p4d.addDoc(c); + } + System.out.println("Set Size:"+ p4d.size()); + Thread arr [] = new Thread[5]; + for(int i=0;i list = new LinkedList(); + LinkedList list2 = new LinkedList(); + int prev = 0; + for (int i = 55; i < _length*256; i++) { + prev +=i; + list.add(prev); + } + + Collections.sort(list); + //System.out.println("Largest Element in the List:"+list.get( list.size() -1 )); + + final int maxVal = list.get(list.size()-1); + + //P4D + final P4DDocIdSet p4d = new P4DDocIdSet(); + int counter=0; + + for (Integer c : list) { + counter++; + //System.out.println(c); + p4d.addDoc(c); + } + System.out.println("Set Size:"+ p4d.size()); + + Thread arr [] = new Thread[5]; + for(int i=0;i list1 = new TreeSet(); + long now = System.nanoTime(); + for (int i = 1; i < (_length); i++) + // for(int i = 1;i<2;i++) + { + + // System.out.println("Randomizer ="+randomizer); + ArrayList list = new ArrayList(); + + for (int k = 0; k < batch; k++) { + list.add(randomizer + (int) (random.nextDouble() * 1000)); + } + Collections.sort(list); + randomizer += 1000*booster; + + for (int k = 0; k < batch; k++) { + list1.add(list.get(k)); + set.addDoc(list.get(k)); + } + + // System.out.println("At :" + i +" "+(randomizer-1000) +" " + + // randomizer); + } + + totalCompressionTime = System.nanoTime() - now; + // System.out.println("Total compression time :"+totalCompressionTime+": + // for"+((double)batch*length)/1000000+" M numbers"); + StatefulDSIterator its = set.iterator(); + int x = 0; + now = System.nanoTime(); + int i = -1; + Iterator itd = list1.iterator(); + int docid; + while ((docid=its.nextDoc())!=DocIdSetIterator.NO_MORE_DOCS && itd.hasNext() ) { + + assertEquals(docid, itd.next().intValue()); + } + // System.out.println(its.doc()); + + totalDecompressionTime = System.nanoTime() - now; + + + + } + + @Test + public void testOBSDocIdSetSkipSanity() { + double booster = ((_max*1.0)/(1000f*_length)); + System.out.println(""); + System.out.println("Running OBSDocIDSet Skip Sanity test"); + System.out.println("----------------------------"); + Random random = new Random(); + + OBSDocIdSet set = new OBSDocIdSet(_max); + int randomizer = 0; + double totalDecompressionTime = 0; + TreeSet list = new TreeSet(); + ArrayList list2 = new ArrayList(); + + for (int i = 1; i < _length + 1; i++) { + + for (int k = 0; k < batch; k++) { + list.add(randomizer + (int) (random.nextDouble() * 1000)); + } + + randomizer += 1000*booster; + } + + //Collections.sort(list); + int counter=0; + for (Integer c : list) { + counter++; + set.addDoc(c); + } + + + list2.addAll(list); + // Measure time to obtain iterator + long time = System.nanoTime(); + set.optimize(); + System.out.println("Time to optimize set of " + counter + " numbers : "+ (System.nanoTime() - time)+"ns"); + System.out.println("Size in Bytes:"+set.sizeInBytes()); + StatefulDSIterator dcit = set.iterator(); + + + long now = System.nanoTime(); + + for (int i = 0; i < _max; i += 600) { + try { + + int docid = dcit.advance(i); + if (docid!=DocIdSetIterator.NO_MORE_DOCS) { + + //System.out.println(dcit.doc()+":"+list2.get(dcit.getCursor())+":"+dcit.getCursor()); + assertEquals(docid, list2.get(dcit.getCursor()).intValue()); + + docid = dcit.nextDoc(); + if (docid!=DocIdSetIterator.NO_MORE_DOCS) + { + //System.out.println(dcit.doc()+":"+list2.get(dcit.getCursor())+":"+dcit.getCursor()); + assertEquals(docid, list2.get(dcit.getCursor()).intValue()); + } + docid = dcit.nextDoc(); + if (docid!=DocIdSetIterator.NO_MORE_DOCS) + { + //System.out.println(dcit.doc()+":"+list2.get(dcit.getCursor())+":"+dcit.getCursor()); + assertEquals(docid, list2.get(dcit.getCursor()).intValue()); + } + } + } catch (Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + } + + } + + @Test + public void testOBSDocIdSetPerformance() throws IOException { + double booster = ((_max*1.0)/(1000f*_length)); + System.out.println(""); + System.out.println("Running OBSDocIdSet Performance test"); + System.out.println("----------------------------"); + OBSDocIdSet set = new OBSDocIdSet(_max); + // OBSDocIdSet set = new OBSDocIdSet(1000); + + Random random = new Random(); + + // Minimum 5 bits + int randomizer = 0; + double totalCompressionTime = 0; + double totalDecompressionTime = 0; + double totalCompressionRatio = 0; + + long now = System.nanoTime(); + for (int i = 1; i < (_length); i++) + // for(int i = 1;i<2;i++) + { + + // System.out.println("Randomizer ="+randomizer); + ArrayList list = new ArrayList(); + + for (int k = 0; k < batch; k++) { + list.add(randomizer + (int) (random.nextDouble() * 1000)); + } + Collections.sort(list); + randomizer += 1000*booster; + + for (int k = 0; k < batch; k++) { + set.addDoc(list.get(k)); + } + + // System.out.println("At :" + i +" "+(randomizer-1000) +" " + + // randomizer); + } + + totalCompressionTime = System.nanoTime() - now; + // System.out.println("Total compression time :"+totalCompressionTime+": + // for"+((double)batch*length)/1000000+" M numbers"); + StatefulDSIterator its = set.iterator(); + int x = 0; + now = System.nanoTime(); + int docid; + while ((docid=its.nextDoc())!=DocIdSetIterator.NO_MORE_DOCS) { + + x = docid; + // System.out.println(its.doc()); + + } + totalDecompressionTime = System.nanoTime() - now; + set.optimize(); + System.out.println("Total decompression time :" + totalDecompressionTime + + ": for " + ((double) batch * _length) / 1000000 + " M numbers"); + + System.out.println("Compression Ratio:" + ((double) (_max)) + /(batch*_length*32) + " for max=" + _max); + + } + + + @Test + public void testIntArrayDocIdSetSkipSanity() { + + System.out.println(""); + System.out.println("Running IntArrayDocIdSet Skip Sanity test"); + System.out.println("----------------------------"); + int size = batch * _length; + IntArrayDocIdSet set = new IntArrayDocIdSet(size); + + Random random = new Random(); + ArrayList list = new ArrayList(); + ArrayList list2 = new ArrayList(); + + long now = System.nanoTime(); + for (int i = 0; i < size; i++) { + list.add((int) (i * 100 + random.nextDouble() * 1000)); + } + + Collections.sort(list); + + for (int k = 0; k < size; k++) { + set.addDoc(list.get(k)); + } + + // System.out.println("Total compression time :"+totalCompressionTime+": + // for"+((double)batch*length)/1000000+" M numbers"); + StatefulDSIterator dcit = set.iterator(); + for (int i = 0; i < _max; i += 60) { + try { + int docid = dcit.advance(i); + if (docid!=DocIdSetIterator.NO_MORE_DOCS) { + + assertEquals(docid, list.get(dcit.getCursor()).intValue()); + docid = dcit.nextDoc(); + if (docid!=DocIdSetIterator.NO_MORE_DOCS) + assertEquals(docid, list.get(dcit.getCursor()).intValue()); + docid = dcit.nextDoc(); + if (docid!=DocIdSetIterator.NO_MORE_DOCS) + assertEquals(docid, list.get(dcit.getCursor()).intValue()); + } + } catch (Exception e) { + fail(e.getMessage()); + } + } + + } + + + + @Test + public void testIntArrayDocIdSetIterateSanity() { + System.out.println(""); + System.out.println("Running IntArrayDocIdSet Iterate sanity test"); + System.out.println("----------------------------"); + int size = batch * _length; + IntArrayDocIdSet set = new IntArrayDocIdSet(_length); + // OBSDocIdSet set = new OBSDocIdSet(1000); + + Random random = new Random(); + ArrayList list = new ArrayList(); + ArrayList list2 = new ArrayList(); + + long now = System.nanoTime(); + for (int i = 0; i < size; i++) { + list.add((int) (i * 100 + random.nextDouble() * 1000)); + } + + Collections.sort(list); + + for (int k = 0; k < size; k++) { + set.addDoc(list.get(k)); + } + + // System.out.println("Total compression time :"+totalCompressionTime+": + // for"+((double)batch*length)/1000000+" M numbers"); + StatefulDSIterator dcit = set.iterator(); + int x = 0; + now = System.nanoTime(); + try { + int docid; + while ((docid=dcit.nextDoc())!=DocIdSetIterator.NO_MORE_DOCS) { + list2.add(docid); + // System.out.println(dcit.doc()); + } + } catch (IOException e) { + fail(e.getMessage()); + } + + for (int i = 0; i < list.size(); i++) { + assertEquals(list.get(i).intValue(), list2.get(i).intValue()); + + } + + } + + + @Test + public void testIntArrayDocIdSetIteratePerformance() { + System.out.println(""); + System.out.println("Running IntArrayDocIdSet Iterate Performance test"); + System.out.println("----------------------------"); + int size = batch * _length; + IntArrayDocIdSet set = new IntArrayDocIdSet(_length); + // OBSDocIdSet set = new OBSDocIdSet(1000); + + Random random = new Random(); + ArrayList list = new ArrayList(); + + long now = System.nanoTime(); + for (int i = 0; i < size; i++) { + list.add((int) (i * 100 + random.nextDouble() * 1000)); + } + + Collections.sort(list); + + for (int k = 0; k < size; k++) { + set.addDoc(list.get(k)); + } + + // System.out.println("Total compression time :"+totalCompressionTime+": + // for"+((double)batch*length)/1000000+" M numbers"); + StatefulDSIterator dcit = set.iterator(); + int x = 0; + now = System.nanoTime(); + try { + int docid; + while ((docid=dcit.nextDoc())!= DocIdSetIterator.NO_MORE_DOCS) { + x = docid; + } + } catch (IOException e) { + fail(e.getMessage()); + } + + double totalDecompressionTime = System.nanoTime() - now; + System.out.println("Decompression time for batch size:" + batch + " is " + + totalDecompressionTime + " for " + ((double) batch * _length) + / 1000000 + " M numbers"); + + + } + + @Test + public void testP4DDocIdSetIteratePerformance() { + double booster = ((_max*1.0)/(1000f*_length)); + + P4DDocIdSet set = new P4DDocIdSet(batch); + System.out.println(""); + System.out.println("Running P4DeltaDocSet Iterate Performance test"); + System.out.println("----------------------------"); + Random random = new Random(); + // Minimum 5 bits + + int randomizer = 0; + double totalDecompressionTime = 0; + List list = new LinkedList(); + LinkedList list2 = new LinkedList(); + int val = 0 ; + for (int i = 1; i < _length + 1; i++) { + + int bVal[] = new int[33]; + for (int k = 0; k < batch; k++) { + val = randomizer + (int) (random.nextDouble() * 1000); + list.add(val); + + } + + randomizer += 1000*booster; + } + + Collections.sort(list); + System.out.println("Largest Element in the List:"+list.get( list.size() -1 )); + + for (Integer c : list) { + set.addDoc(c); + } + set.optimize(); + StatefulDSIterator dcit = set.iterator(); + + long now = System.nanoTime(); + // int x = -1; + try { + while (dcit.nextDoc()!=DocIdSetIterator.NO_MORE_DOCS) { + + } + } catch (IOException e1) { + fail(e1.getMessage()); + } + totalDecompressionTime = System.nanoTime() - now; + System.out.println("Decompression time for batch size:" + batch + " is " + + totalDecompressionTime + " for " + ((double) batch * _length) + / 1000000 + " M numbers"); + System.out.println("Compression Ratio : "+ ((double)set.sizeInBytes())/(batch * _length * 4)); + } + + + @Test + public void testP4DDocIdSetNonBoundarySkipSanity() { + double booster = ((_max*1.0)/(1000f*_length)); + P4DDocIdSet set = new P4DDocIdSet(batch); + System.out.println(""); + System.out.println("Running P4DeltaDocSet Non-Boundary skip test"); + System.out.println("----------------------------"); + Random random = new Random(); + int extra = 35; + int length = 1000; + if (_length > 100) + length = _length / 100; + + int size = batch * length; + int randomizer = 0; + double totalDecompressionTime = 0; + + List list = new LinkedList(); + + for (int i = 1; i < _length + 1; i++) { + for (int k = 0; k < batch; k++) { + list.add(randomizer + (int) (random.nextDouble() * 1000)); + } + + randomizer += 1000*booster; + } + + randomizer += 1000*booster; + for (int i = 0; i < extra; i++) + list.add(randomizer + (int) (random.nextDouble() * 1000)); + int counter = 0; + + Collections.sort(list); + System.out.println("Largest Element in the List:"+list.get( list.size() -1 )); + // System.out.println(list); + for (Integer c : list) + { + counter++; + set.addDoc(c); + } + + // Measure time to obtain iterator + long time = System.nanoTime(); + set.optimize(); + System.out.println("Time to optimize set of " + counter + " numbers : "+ (System.nanoTime() - time)+"ns"); + System.out.println("Size in Bytes:"+set.sizeInBytes()); + + StatefulDSIterator dcit = set.iterator(); + + + for (int i = 0; i < size; i += 60) { + try { + int docid=dcit.advance(i); + if (docid!=DocIdSetIterator.NO_MORE_DOCS) { + + assertEquals(docid, list.get(dcit.getCursor()).intValue()); + docid=dcit.nextDoc(); + if (docid!=DocIdSetIterator.NO_MORE_DOCS) + assertEquals(docid, list.get(dcit.getCursor()).intValue()); + docid=dcit.nextDoc(); + if (docid!=DocIdSetIterator.NO_MORE_DOCS) + assertEquals(docid, list.get(dcit.getCursor()).intValue()); + } + } catch (Exception e) { + e.printStackTrace(); + fail(e.getMessage()); + } + } + + } + + @Test + public void testP4DDocIdSetNonBoundaryCompressionSanity() throws IOException { + int extra = 34; + double booster = ((_max*1.0)/(1000f*_length)); + int counter = 0; + P4DDocIdSet set = new P4DDocIdSet(batch); + System.out.println(""); + System.out.println("Running P4DeltaDocSet Non-Boundary Compression Sanity test"); + System.out.println("----------------------------"); + Random random = new Random(); + // Minimum 5 bits + int size = _length; + if (_length > 100) + size = _length / 100; + + int randomizer = 0; + double totalDecompressionTime = 0; + List list = new LinkedList(); + + for (int i = 1; i < size + 1; i++) { + for (int k = 0; k < batch; k++) { + counter++; + list.add(randomizer + (int) (random.nextDouble() * 1000)); + } + + randomizer += 1000*booster; + } + + randomizer += 1000; + + for (int i = 0; i < extra; i++) + { + counter++; + list.add(randomizer + (int) (random.nextDouble() * 1000)); + } + + Collections.sort(list); + + long time = System.nanoTime(); + + + for (Integer c : list) { + + set.addDoc(c); + } + + + // Measure time to obtain iterator + time = System.nanoTime(); + set.optimize(); + + + StatefulDSIterator dcit = set.iterator(); + + long now = System.nanoTime(); + int i = 0; + int docid; + while ((docid = dcit.nextDoc())!=DocIdSetIterator.NO_MORE_DOCS) { + assertEquals(list.get(i++).intValue(), docid); + } + + } + + @Test + public void testP4DDocIdSetSkipSanity() { + double booster = ((_max*1.0)/(1000f*_length)); + P4DDocIdSet set = new P4DDocIdSet(batch); + System.out.println(""); + System.out.println("Running P4DeltaDocSet Skip Sanity test"); + System.out.println("----------------------------"); + Random random = new Random(); + + int max = 1000; + + int randomizer = 0; + double totalDecompressionTime = 0; + List list = new LinkedList(); + LinkedList list2 = new LinkedList(); + + for (int i = 1; i < _length + 1; i++) { + + for (int k = 0; k < batch; k++) { + list.add(randomizer + (int) (random.nextDouble() * 1000)); + } + + randomizer += 1000*booster; + } + + Collections.sort(list); + System.out.println("Largest Element in the List:"+list.get( list.size() -1 )); + long time = System.nanoTime(); + + int counter=0; + for (Integer c : list) { + counter++; + set.addDoc(c); + } + System.out.println("Time to construct:"+(System.nanoTime() - time)+" ns"); + + // Measure time to obtain iterator + time = System.nanoTime(); + set.optimize(); + System.out.println("Time to optimize set of " + counter + " numbers : "+ (System.nanoTime() - time)+"ns"); + System.out.println("Size in Bytes:"+set.sizeInBytes()); + StatefulDSIterator dcit = set.iterator(); + + + long now = System.nanoTime(); + + for (int i = 0; i < max; i += 600) { + try { + + int docid = dcit.advance(i); + if (docid!=DocIdSetIterator.NO_MORE_DOCS) { + + assertEquals(docid, list.get(dcit.getCursor()).intValue()); + if ((docid = dcit.nextDoc())!=DocIdSetIterator.NO_MORE_DOCS) + assertEquals(docid, list.get(dcit.getCursor()).intValue()); + if ((docid = dcit.nextDoc())!=DocIdSetIterator.NO_MORE_DOCS) + assertEquals(docid, list.get(dcit.getCursor()).intValue()); + } + } catch (Exception e) { + fail(e.getMessage()); + } + } + + } + + + @Test + public void testSkipPerformance() throws IOException + { + System.out.println(""); + System.out.println("Running Doc Skip Performance"); + System.out.println("----------------------------"); + + double booster = ((_max*1.0)/(1000f*_length)); + P4DDocIdSet set = new P4DDocIdSet(batch); + System.out.println(""); + System.out.println("Running P4DeltaDocSet Skip Sanity test"); + System.out.println("----------------------------"); + Random random = new Random(); + + int max = 1000; + + int randomizer = 0; + double totalDecompressionTime = 0; + List list = new LinkedList(); + LinkedList list2 = new LinkedList(); + + for (int i = 1; i < _length + 1; i++) { + + for (int k = 0; k < batch; k++) { + list.add(randomizer + (int) (random.nextDouble() * 1000)); + } + + randomizer += 1000*booster; + } + + Collections.sort(list); + //System.out.println("Largest Element in the List:"+list.get( list.size() -1 )); + + + + //P4D + P4DDocIdSet p4d = new P4DDocIdSet(); + int counter=0; + + for (Integer c : list) { + counter++; + p4d.addDoc(c); + } + StatefulDSIterator dcit = p4d.iterator(); + _testSkipPerformance(list.get(list.size()-1),dcit); + + // Int Array + IntArrayDocIdSet iSet = new IntArrayDocIdSet(list.size()); + counter=0; + + for (Integer c : list) { + counter++; + p4d.addDoc(c); + } + dcit = iSet.iterator(); + _testSkipPerformance(list.get(list.size()-1),dcit); + + // OBS + OBSDocIdSet oSet = new OBSDocIdSet(list.size()); + counter=0; + + for (Integer c : list) { + counter++; + p4d.addDoc(c); + } + dcit = oSet.iterator(); + _testSkipPerformance(list.get(list.size()-1),dcit); + + } + + + + private void _testSkipPerformance(int max, StatefulDSIterator dcit) throws IOException { + + + long now = System.nanoTime(); + int ctr = 0; + for(int i=0;i docSets = new ArrayList(); + Random random = new Random(); + // Minimum 5 bits + int randomizer = 0; + + for (int j = 0; j < all; j++) { + ArrayList intSet = new ArrayList(); + P4DDocIdSet docSet = new P4DDocIdSet(batch); + randomizer = 0; + for (int i = 1; i < size + 1; i++) { + + for (int k = 0; k < batch; k++) { + intSet.add(randomizer + (int) (random.nextDouble() * 1000)); + } + + randomizer += 1000*booster; + Collections.sort(intSet); + + } + for (Integer c : intSet) { + docSet.addDoc(c); + } + docSets.add(docSet); + + } + System.out.println("Constructed component DocSets"); + org.apache.lucene.search.DocIdSetIterator oit = new AndDocIdSet(docSets).iterator(); + long now = System.nanoTime(); + try { + int docid; + while ((docid = oit.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + fail(e.getMessage()); + } + + totalDecompressionTime = System.nanoTime() - now; + System.out.println("Total decompression time :" + totalDecompressionTime + + ": for " + ((double) batch * size) / 1000000 + " M numbers"); + + } + + + @Test + @Ignore + public void testNotDocIdSet() throws IOException { + System.out.println(""); + System.out.println("Running NotDocIdSet test"); + System.out.println("----------------------------"); + int max = 1000; + + if (_max > 1000) + max = _max / 1000; + + int length = 100; + + if (_length > 100) + length = _length / 100; + Random random = new Random(); + + int randomizer = 0; + int b = 0; + ArrayList intSet = new ArrayList(); + P4DDocIdSet docSet = new P4DDocIdSet(batch); + randomizer = 0; + + for (int i = 1; i < length + 1; i++) { + + int bVal[] = new int[33]; + for (int k = 0; k < batch; k++) { + b = randomizer + (int) (random.nextDouble() * 1000); + intSet.add(b); + + } + + randomizer += 1000; + Collections.sort(intSet); + + + } + for (Integer c : intSet) { + docSet.addDoc(c); + } + + org.apache.lucene.search.DocIdSetIterator oit = new NotDocIdSet(docSet, max).iterator(); + + int docid; + while ((docid = oit.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + assertFalse(intSet.contains(docid)); + } + + } + +} Index: contrib/kamikaze/test/org/apache/lucene/kamikaze/test/perf/BooleanDocIdSetTest.java =================================================================== --- contrib/kamikaze/test/org/apache/lucene/kamikaze/test/perf/BooleanDocIdSetTest.java (revision 0) +++ contrib/kamikaze/test/org/apache/lucene/kamikaze/test/perf/BooleanDocIdSetTest.java (revision 0) @@ -0,0 +1,60 @@ +package org.apache.lucene.kamikaze.test.perf; + +import java.util.Arrays; + +import org.apache.lucene.kamikaze.docidset.impl.AndDocIdSet; +import org.apache.lucene.kamikaze.docidset.impl.IntArrayDocIdSet; +import org.apache.lucene.kamikaze.docidset.impl.OrDocIdSet; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; + + +public class BooleanDocIdSetTest { + private static DocIdSet[] DocList; + static + { + DocList = new DocIdSet[5]; + int maxdoc = 1000000; + for (int i=0;i list = new ArrayList(); + ArrayList list2 = new ArrayList(); + + long now = System.nanoTime(); + for (int i = 0; i < size; i++) { + list.add((int) (i * 100 + random.nextDouble() * 1000)); + } + + Collections.sort(list); + + for (int k = 0; k < size; k++) { + set.addDoc(list.get(k)); + } + + // System.out.println("Total compression time :"+totalCompressionTime+": + // for"+((double)batch*length)/1000000+" M numbers"); + StatefulDSIterator dcit = set.iterator(); + int x = 0; + now = System.nanoTime(); + while (dcit.next()) { + list2.add(dcit.doc()); + // System.out.println(dcit.doc()); + } + + for (int i = 0; i < list.size(); i++) { + if (list.get(i).intValue() != list2.get(i).intValue()) + System.err.println("Expected:" + list.get(i) + " but was:" + + list2.get(i) + " at index :" + i); + } + + } + + private static void testIntArrayDocIdSetSkipSanity(int size) + throws IOException { + System.out.println("Running IntArrayDocIdSet Skip Sanity test"); + System.out.println("----------------------------"); + IntArrayDocIdSet set = new IntArrayDocIdSet(size); + + Random random = new Random(); + ArrayList list = new ArrayList(); + ArrayList list2 = new ArrayList(); + + long now = System.nanoTime(); + for (int i = 0; i < size; i++) { + list.add((int) (i * 100 + random.nextDouble() * 1000)); + } + + Collections.sort(list); + + for (int k = 0; k < size; k++) { + set.addDoc(list.get(k)); + } + + // System.out.println("Total compression time :"+totalCompressionTime+": + // for"+((double)batch*length)/1000000+" M numbers"); + StatefulDSIterator dcit = set.iterator(); + for (int i = 0; i < 200000; i += 60) { + try { + + if (dcit.skipTo(i)) { + // System.out.println("Target:"+i+" Found:"+dcit.doc()); + if (dcit.doc() != list.get(dcit.getCursor())) + System.err.println("1." + dcit.doc() + ":" + dcit.getCursor() + ":" + + list.get(dcit.getCursor())); + if (dcit.next()) + if (dcit.doc() != list.get(dcit.getCursor())) + System.err.println("2." + dcit.doc() + ":" + dcit.getCursor() + + ":" + list.get(dcit.getCursor())); + if (dcit.next()) + if (dcit.doc() != list.get(dcit.getCursor())) + System.err.println("3." + dcit.doc() + ":" + dcit.getCursor() + + ":" + list.get(dcit.getCursor())); + + } + // else + // System.out.println("Number out of range"); + } catch (Exception e) { + e.printStackTrace(); + System.out.flush(); + System.exit(1); + } + } + + } + + private static void testSimpleArraySet(int batch, int length) { + System.out.println("Running No Alloc Integer Array Set test"); + System.out.println("----------------------------"); + + int randomizer = 0; + Random random = new Random(); + int[] source = new int[batch * length]; + + for (int i = 0; i < (batch * length); i++) { + source[i] = randomizer + (int) (random.nextDouble() * 1000); + } + + long now = System.nanoTime(); + + for (int i = 0; i < batch * length; i++) + randomizer = source[i]; + + System.out.println("Total decompression time :" + + ((double) System.nanoTime() - now) + ": for " + + ((double) batch * length) / 1000000 + " M numbers"); + + } +/* + private static void testP4DCompressed(int batch, int length) + throws IOException { + P4DSetNoBase set = new P4DSetNoBase(); + System.out.println("Running P4Delta Compressed set test"); + System.out.println("----------------------------"); + Random random = new Random(); + OpenBitSet compressedSet = null; + long now = System.nanoTime(); + int vals[] = null; + int[] input = new int[batch]; + // Minimum 5 bits + int b = 5; + int exceptionCount = 0; + double logBase2 = Math.log(2); + int exceptionOver = 12; + int base = 0; + int randomizer = 0; + double totalCompressionTime = 0; + double totalDecompressionTime = 0; + double totalCompressionRatio = 0; + + for (int i = 1; i < length + 1; i++) { + + // System.out.println("Randomizer ="+randomizer); + ArrayList list = new ArrayList(); + int bVal[] = new int[33]; + + for (int k = 0; k < batch; k++) { + list.add(randomizer + (int) (random.nextDouble() * 1000)); + // list.add(newTest[k]); + } + Collections.sort(list); + randomizer += 1000; + input[0] = list.get(0); + for (int j = batch - 1; j > 0; j--) { + try { + input[j] = list.get(j) - list.get(j - 1); + if (input[j] == 0) + bVal[1]++; + else + bVal[(int) (Math.log(input[j]) / logBase2) + 1]++; + + } catch (ArrayIndexOutOfBoundsException w) { + System.out.println(j); + } + + } + + base = input[0]; + input[0] = 0; + + // formulate b value + for (int k = 32; k > 4; k--) { + exceptionCount += bVal[k]; + if (exceptionCount > exceptionOver) { + b = k; + exceptionCount -= bVal[k]; + break; + } + } + + b += 1; + set.setParam(base, b, batch, exceptionCount); + + compressedSet = set.compress(input); + totalCompressionTime += (System.nanoTime() - now); + // System.out.println("Time to compress:"+ (System.nanoTime() - now )+ " + // nanos.."); + + now = System.nanoTime(); + + long nowMillis = System.currentTimeMillis(); + // vals = set.decompress(compressedSet); + int lastVal = base; + for (int l = 0; l < batch; l++) { + lastVal += set.get(compressedSet, l); + } + + // System.out.println("Time to decompress:"+ (System.nanoTime() - now )+ " + // nanos.."); + totalDecompressionTime += (System.nanoTime() - now); + totalCompressionRatio += (double) compressedSet.size() / (batch * 32);*/ + + // System.out.println("Average Compression Time after:"+i+" iterations="); + // writer.write(randomizer-1000+" " + randomizer+ " " + // +totalCompressionTime/i+" "+totalDecompressionTime/i+" + // "+totalCompressionRatio/i+"\n"); + // System.out.println(randomizer-1000+" " + randomizer+ " " + // +totalCompressionTime/i+" "+totalDecompressionTime/i+" + // "+totalCompressionRatio/i); + + /* + * lastVal = base; for(int l=0;l list = new LinkedList(); + LinkedList list2 = new LinkedList(); + + for (int i = 1; i < length + 1; i++) { + for (int k = 0; k < batch; k++) { + list.add(randomizer + (int) (random.nextDouble() * 1000)); + } + + randomizer += 1000; + } + + randomizer += 1000; + for (int i = 0; i < extra; i++) + list.add(randomizer + (int) (random.nextDouble() * 1000)); + + Collections.sort(list); + // System.out.println(list); + for (Integer c : list) + set.addDoc(c); + + StatefulDSIterator dcit = set.iterator(); + + long now = System.nanoTime(); + // int x = -1; + while (dcit.next()) { + list2.add(dcit.doc()); + // dcit.doc(); + } + + System.out.println(list); + System.out.println(list2); + for (int i = 0; i < list.size(); i++) { + if (list.get(i).intValue() != list2.get(i).intValue()) + System.err.println("Expected:" + list.get(i) + " but was:" + + list2.get(i) + " at index :" + i); + } + System.out.println("Verified.."); + } + + private static void testP4DDocIdSetNonBoundarySkipSanity(int batch, + int length, int extra) throws IOException { + P4DDocIdSet set = new P4DDocIdSet(batch); + System.out.println("Running P4DeltaDocSet Non-Boundary skip test"); + System.out.println("----------------------------"); + Random random = new Random(); + // Minimum 5 bits + int b = 5; + int exceptionCount = 0; + double logBase2 = Math.log(2); + int randomizer = 0; + double totalDecompressionTime = 0; + List list = new LinkedList(); + LinkedList list2 = new LinkedList(); + + for (int i = 1; i < length + 1; i++) { + for (int k = 0; k < batch; k++) { + list.add(randomizer + (int) (random.nextDouble() * 1000)); + } + + randomizer += 1000; + } + + randomizer += 1000; + for (int i = 0; i < extra; i++) + list.add(randomizer + (int) (random.nextDouble() * 1000)); + + Collections.sort(list); + // System.out.println(list); + for (Integer c : list) + set.addDoc(c); + + StatefulDSIterator dcit = set.iterator(); + System.out.println(list); + for (int i = 0; i < 200000; i += 60) { + try { + + if (dcit.skipTo(i)) { + System.out.println("Target:" + i + " Found:" + dcit.doc()); + if (dcit.doc() != list.get(dcit.getCursor())) + System.err.println("1." + dcit.doc() + ":" + dcit.getCursor() + ":" + + list.get(dcit.getCursor())); + if (dcit.next()) + if (dcit.doc() != list.get(dcit.getCursor())) + System.err.println("2." + dcit.doc() + ":" + dcit.getCursor() + + ":" + list.get(dcit.getCursor())); + if (dcit.next()) + if (dcit.doc() != list.get(dcit.getCursor())) + System.err.println("3." + dcit.doc() + ":" + dcit.getCursor() + + ":" + list.get(dcit.getCursor())); + + } + // else + // System.out.println("Number out of range"); + } catch (Exception e) { + e.printStackTrace(); + System.out.flush(); + System.exit(1); + } + } + + } + + private static void testP4DDocIdSetIteratePerf(int batch, int length) + throws IOException { + P4DDocIdSet set = new P4DDocIdSet(batch); + System.out.println("Running P4DeltaDocSet Iteration Performance test"); + System.out.println("----------------------------"); + Random random = new Random(); + // Minimum 5 bits + int b = 5; + int exceptionCount = 0; + double logBase2 = Math.log(2); + int randomizer = 0; + double totalDecompressionTime = 0; + List list = new LinkedList(); + LinkedList list2 = new LinkedList(); + + for (int i = 1; i < length + 1; i++) { + + int bVal[] = new int[33]; + for (int k = 0; k < batch; k++) { + list.add(randomizer + (int) (random.nextDouble() * 1000)); + } + + randomizer += 1000; + } + + Collections.sort(list); + // System.out.println(list); + for (Integer c : list) { + set.addDoc(c); + } + + StatefulDSIterator dcit = set.iterator(); + + long now = System.nanoTime(); + // int x = -1; + while (dcit.next()) { + // list2.add(dcit.doc ()); + dcit.doc(); + } + totalDecompressionTime = System.nanoTime() - now; + System.out.println("Decompression time for batch size:" + batch + " is " + + totalDecompressionTime + " for " + ((double) batch * length) + / 1000000 + " M numbers"); + + } + + private static void testP4DDocIdSetSkipPerf(int batch, int length) + throws IOException { + P4DDocIdSet set = new P4DDocIdSet(batch); + System.out.println("Running P4DeltaDocSet Skip Perf test"); + System.out.println("----------------------------"); + Random random = new Random(); + // Minimum 5 bits + int b = 5; + int exceptionCount = 0; + double logBase2 = Math.log(2); + int randomizer = 0; + double totalDecompressionTime = 0; + List list = new LinkedList(); + LinkedList list2 = new LinkedList(); + + for (int i = 1; i < length + 1; i++) { + + int bVal[] = new int[33]; + for (int k = 0; k < batch; k++) { + list.add(randomizer + (int) (random.nextDouble() * 1000)); + } + + randomizer += 1000; + } + + Collections.sort(list); + // System.out.println(list); + for (Integer c : list) { + set.addDoc(c); + } + + StatefulDSIterator dcit = set.iterator(); + + long now = System.nanoTime(); + + // Get a new iterator + dcit = set.iterator(); + + for (int i = 0; i < 2000000; i += 60) { + + if (dcit.skipTo(i)) { + dcit.doc(); + } + } + + totalDecompressionTime = System.nanoTime() - now; + + System.out.println("Skipping time for batch size:" + batch + " is " + + totalDecompressionTime + " for " + ((double) batch * length) + / 1000000 + " M numbers"); + + } + + private static void testP4DDocIdSet(int batch, int length) throws IOException { + P4DDocIdSet set = new P4DDocIdSet(batch); + System.out.println("Running P4DeltaDocSet test"); + System.out.println("----------------------------"); + Random random = new Random(); + // Minimum 5 bits + int b = 5; + int exceptionCount = 0; + double logBase2 = Math.log(2); + int randomizer = 0; + double totalDecompressionTime = 0; + List list = new LinkedList(); + LinkedList list2 = new LinkedList(); + + for (int i = 1; i < length + 1; i++) { + + int bVal[] = new int[33]; + for (int k = 0; k < batch; k++) { + list.add(randomizer + (int) (random.nextDouble() * 1000)); + } + + randomizer += 1000; + } + + Collections.sort(list); + // System.out.println(list); + for (Integer c : list) { + set.addDoc(c); + } + + StatefulDSIterator dcit = set.iterator(); + + long now = System.nanoTime(); + // int x = -1; + while (dcit.next()) { + list2.add(dcit.doc()); + // dcit.doc(); + } + totalDecompressionTime = System.nanoTime() - now; + System.out.println("Decompression time for batch size:" + batch + " is " + + totalDecompressionTime + " for " + ((double) batch * length) + / 1000000 + " M numbers"); + + System.out.println(list); + System.out.println(list2); + for (int i = 0; i < list.size(); i++) { + if (list.get(i).intValue() != list2.get(i).intValue()) + System.err.println("Expected:" + list.get(i) + " but was:" + + list2.get(i) + " at index :" + i); + } + + now = System.nanoTime(); + // Get a new iterator + dcit = set.iterator(); + + for (int i = 0; i < 200000; i += 60) { + try { + + if (dcit.skipTo(i)) { + System.out.println("Target:" + i + " Found:" + dcit.doc()); + if (dcit.doc() != list.get(dcit.getCursor())) + System.err.println("1." + dcit.doc() + ":" + dcit.getCursor() + ":" + + list.get(dcit.getCursor())); + if (dcit.next()) + if (dcit.doc() != list.get(dcit.getCursor())) + System.err.println("2." + dcit.doc() + ":" + dcit.getCursor() + + ":" + list.get(dcit.getCursor())); + if (dcit.next()) + if (dcit.doc() != list.get(dcit.getCursor())) + System.err.println("3." + dcit.doc() + ":" + dcit.getCursor() + + ":" + list.get(dcit.getCursor())); + + } + // else + // System.out.println("Number out of range"); + } catch (Exception e) { + e.printStackTrace(); + System.out.flush(); + System.exit(1); + } + } + totalDecompressionTime = System.nanoTime() - now; + + System.out.println("Skipping time for batch size:" + batch + " is " + + totalDecompressionTime + " for " + ((double) batch * length) + / 1000000 + " M numbers"); + + } + + private static void testP4DDocIdSetSkipSanity(int batch, int length) + throws IOException { + P4DDocIdSet set = new P4DDocIdSet(batch); + System.out.println("Running P4DeltaDocSet Skip Sanity test"); + System.out.println("----------------------------"); + Random random = new Random(); + // Minimum 5 bits + int b = 5; + int exceptionCount = 0; + double logBase2 = Math.log(2); + int randomizer = 0; + double totalDecompressionTime = 0; + List list = new LinkedList(); + LinkedList list2 = new LinkedList(); + + for (int i = 1; i < length + 1; i++) { + + int bVal[] = new int[33]; + for (int k = 0; k < batch; k++) { + list.add(randomizer + (int) (random.nextDouble() * 1000)); + } + + randomizer += 1000; + } + + Collections.sort(list); + + for (Integer c : list) { + set.addDoc(c); + } + + StatefulDSIterator dcit = set.iterator(); + + long now = System.nanoTime(); + + for (int i = 0; i < 200000; i += 60) { + + if (dcit.skipTo(i)) { + System.out.println("Target:" + i + " Found:" + dcit.doc()); + if (dcit.doc() != list.get(dcit.getCursor())) + System.err.println("1." + dcit.doc() + ":" + dcit.getCursor() + ":" + + list.get(dcit.getCursor())); + if (dcit.next()) + if (dcit.doc() != list.get(dcit.getCursor())) + System.err.println("2." + dcit.doc() + ":" + dcit.getCursor() + ":" + + list.get(dcit.getCursor())); + if (dcit.next()) + if (dcit.doc() != list.get(dcit.getCursor())) + System.err.println("3." + dcit.doc() + ":" + dcit.getCursor() + ":" + + list.get(dcit.getCursor())); + + } + // else + // System.out.println("Number out of range"); + + } + System.out.println("Verified skipping behavior"); + + } + + private static void testP4DDocIdSetCompressionSanity(int batch, int length) + throws IOException { + P4DDocIdSet set = new P4DDocIdSet(batch); + System.out.println("Running P4DeltaDocSet Compression Sanity test"); + System.out.println("----------------------------"); + Random random = new Random(); + // Minimum 5 bits + int b = 5; + int exceptionCount = 0; + double logBase2 = Math.log(2); + int randomizer = 0; + double totalDecompressionTime = 0; + List list = new LinkedList(); + LinkedList list2 = new LinkedList(); + + for (int i = 1; i < length + 1; i++) { + + int bVal[] = new int[33]; + for (int k = 0; k < batch; k++) { + list.add(randomizer + (int) (random.nextDouble() * 1000)); + } + + randomizer += 1000; + } + + Collections.sort(list); + // System.out.println(list); + for (Integer c : list) { + set.addDoc(c); + } + + StatefulDSIterator dcit = set.iterator(); + + long now = System.nanoTime(); + // int x = -1; + while (dcit.next()) { + list2.add(dcit.doc()); + // dcit.doc(); + } + totalDecompressionTime = System.nanoTime() - now; + System.out.println("Decompression time for batch size:" + batch + " is " + + totalDecompressionTime + " for " + ((double) batch * length) + / 1000000 + " M numbers"); + + for (int i = 0; i < list.size(); i++) { + if (list.get(i).intValue() != list2.get(i).intValue()) + System.err.println("Expected:" + list.get(i) + " but was:" + + list2.get(i) + " at index :" + i); + } + + } + + private static void testOrDocIdSetSkip(int batch, int length, int all) + throws IOException { + System.out.println("Running OrDocIdSet Skip test"); + System.out.println("----------------------------"); + ArrayList docSets = new ArrayList(); + Random random = new Random(); + // Minimum 5 bits + int b = 5; + int exceptionCount = 0; + double logBase2 = Math.log(2); + int randomizer = 0; + + for (int j = 0; j < all; j++) { + ArrayList intSet = new ArrayList(); + P4DDocIdSet docSet = new P4DDocIdSet(batch); + randomizer = 0; + for (int i = 1; i < length + 1; i++) { + + int bVal[] = new int[33]; + for (int k = 0; k < batch; k++) { + intSet.add(randomizer + (int) (random.nextDouble() * 1000)); + } + + randomizer += 1000; + Collections.sort(intSet); + + } + for (Integer c : intSet) { + docSet.addDoc(c); + } + docSets.add(docSet); + } + + DocIdSetIterator dcit = new OrDocIdSet(docSets).iterator(); + + for (int i = 0; i < 200000; i += 64) { + try { + + if (dcit.skipTo(i)) { + System.out.println("Target:" + i + " Found:" + dcit.doc()); + dcit.next(); + dcit.next(); + // if(dcit.doc()!=list.get(dcit.getCursor())) + // System.err.println("1."+dcit.doc()+":"+dcit.getCursor()+":"+list.get(dcit.getCursor())); + // if(dcit.next()) + // if(dcit.doc()!=list.get(dcit.getCursor())) + // System.err.println("2."+dcit.doc()+":"+dcit.getCursor()+":"+list.get(dcit.getCursor())); + // if(dcit.next()) + // if(dcit.doc()!=list.get(dcit.getCursor())) + // System.err.println("3."+dcit.doc()+":"+dcit.getCursor()+":"+list.get(dcit.getCursor())); + + } + // else + // System.out.println("Number out of range"); + } catch (Exception e) { + e.printStackTrace(); + System.out.flush(); + System.exit(1); + } + } + + } + + private static void testOrDocIdSet(int batch, int length, int all) + throws IOException { + + System.out.println("Running OrDocIdSet test"); + System.out.println("----------------------------"); + ArrayList docSets = new ArrayList(); + Random random = new Random(); + // Minimum 5 bits + int b = 5; + int exceptionCount = 0; + double logBase2 = Math.log(2); + int randomizer = 0; + + for (int j = 0; j < all; j++) { + ArrayList intSet = new ArrayList(); + P4DDocIdSet docSet = new P4DDocIdSet(batch); + randomizer = 0; + for (int i = 1; i < length + 1; i++) { + + int bVal[] = new int[33]; + for (int k = 0; k < batch; k++) { + intSet.add(randomizer + (int) (random.nextDouble() * 1000)); + } + + randomizer += 1000; + Collections.sort(intSet); + + } + for (Integer c : intSet) { + docSet.addDoc(c); + } + System.out.println(intSet); + docSets.add(docSet); + } + + DocIdSetIterator oit = new OrDocIdSet(docSets).iterator(); + + while (oit.next()) + System.out.println(oit.doc()); + + } + + private static void testAndDocIdSet(int batch, int length, int all) + throws IOException { + System.out.println("Running AndDocIdSet test"); + System.out.println("----------------------------"); + ArrayList docSets = new ArrayList(); + Random random = new Random(); + // Minimum 5 bits + int b = 5; + int exceptionCount = 0; + double logBase2 = Math.log(2); + int randomizer = 0; + + for (int j = 0; j < all; j++) { + ArrayList intSet = new ArrayList(); + P4DDocIdSet docSet = new P4DDocIdSet(batch); + randomizer = 0; + for (int i = 1; i < length + 1; i++) { + + int bVal[] = new int[33]; + for (int k = 0; k < batch; k++) { + intSet.add(randomizer + (int) (random.nextDouble() * 1000)); + } + + randomizer += 1000; + Collections.sort(intSet); + + } + for (Integer c : intSet) { + docSet.addDoc(c); + } + docSets.add(docSet); + System.out.println(intSet); + } + + DocIdSetIterator oit = new AndDocIdSet(docSets).iterator(); + + while (oit.next()) { + System.out.println(oit.doc()); + + } + + } + + private static void testNotDocIdSetSkipSanity(int batch, int length, int max) + throws IOException { + int[] set = new int[] { 7, 7, 22, 32, 62, 69, 69, 78, 84, 91, 93, 95, 109, + 111, 121, 124, 127, 130, 134, 134, 141, 154, 174, 180, 180, 186, 192, + 193, 194, 198, 239, 258, 269, 285, 307, 308, 313, 327, 329, 332, 334, + 341, 341, 361, 373, 375, 381, 390, 401, 405, 414, 426, 428, 436, 441, + 458, 464, 467, 474, 478, 481, 492, 500, 528, 530, 535, 538, 550, 559, + 568, 580, 588, 596, 597, 604, 604, 608, 613, 624, 629, 634, 648, 652, + 668, 670, 670, 670, 683, 686, 688, 693, 704, 705, 705, 707, 712, 718, + 721, 732, 743, 753, 757, 768, 776, 780, 782, 797, 800, 801, 807, 810, + 816, 826, 836, 854, 856, 858, 863, 868, 888, 889, 896, 897, 898, 899, + 900, 901, 902, 903, 904, 905, 913, 917, 946, 958, 987, 2094, 2112, + 2133, 2146, 2146, 2150, 2164, 2214, 2249, 2314, 2323, 2371, 2395, 2423, + 2426, 2472, 2486, 2527, 2561, 2565, 2569, 2584, 2693, 2710, 2715, 2802, + 2803, 2845, 2854, 2874, 2933, 2944, 2952 }; + System.out.println("Running NotDocIdSetSkip test"); + System.out.println("----------------------------"); + Random random = new Random(); + // Minimum 5 bits + int b = 5; + int exceptionCount = 0; + double logBase2 = Math.log(2); + int randomizer = 0; + + ArrayList intSet = new ArrayList(); + P4DDocIdSet docSet = new P4DDocIdSet(batch); + randomizer = 0; + for (int i = 1; i < length + 1; i++) { + + int bVal[] = new int[33]; + for (int k = 0; k < batch; k++) { + intSet.add(randomizer + (int) (random.nextDouble() * 1000)); + } + + randomizer += 1000; + Collections.sort(intSet); + + } + for (int i = 0; i < intSet.size(); i++) { + // intSet.add(set[i]); + docSet.addDoc(intSet.get(i)); + } + + DocIdSetIterator dcit = new NotDocIdSet(docSet, max).iterator(); + + for (int i = 0; i < 200000; i += 61) { + try { + + if (dcit.skipTo(i)) { + // System.out.println("Target:"+i+" Found:"+dcit.doc()); + if (intSet.contains(dcit.doc())) { + System.err.println("Error..." + dcit.doc()); + System.out.flush(); + } + + dcit.next(); + dcit.next(); + } + + } catch (Exception e) { + e.printStackTrace(); + System.out.flush(); + System.exit(1); + } + } + System.out.println("Not Skip test finished"); + + } + + private static void testNotDocIdSet(int batch, int length, int max) + throws IOException { + System.out.println("Running NotDocIdSet test"); + System.out.println("----------------------------"); + Random random = new Random(); + // Minimum 5 bits + int b = 5; + int exceptionCount = 0; + double logBase2 = Math.log(2); + int randomizer = 0; + + ArrayList intSet = new ArrayList(); + P4DDocIdSet docSet = new P4DDocIdSet(batch); + randomizer = 0; + for (int i = 1; i < length + 1; i++) { + + int bVal[] = new int[33]; + for (int k = 0; k < batch; k++) { + intSet.add(randomizer + (int) (random.nextDouble() * 1000)); + } + + randomizer += 1000; + Collections.sort(intSet); + + } + for (Integer c : intSet) { + docSet.addDoc(c); + } + + DocIdSetIterator oit = new NotDocIdSet(docSet, max).iterator(); + System.out.println(intSet); + while (oit.next()) { + + // System.out.println(oit.doc()); + if (intSet.contains(oit.doc())) { + System.err.println("Error..." + oit.doc()); + System.out.flush(); + } + } + System.out.println("Not Function performed"); + + } + + private static void testCombinationSetOperation(int batch, int length) + throws IOException { + System.out.println("Running AndDocIdSet Skip sanity test"); + System.out.println("----------------------------"); + int set1[] = { 8, 27, 30, 35, 53, 59, 71, 74, 87, 92, 104, 113, 122, 126, + 135, 135, 137, 138, 185, 186, 188, 192, 197, 227, 227, 230, 242, 252, + 255, 259, 267, 270, 271, 289, 298, 305, 311, 312, 325, 335, 337, 346, + 351, 360, 365, 371, 375, 380, 387, 391, 406, 407, 408, 419, 425, 430, + 443, 451, 454, 456, 464, 466, 469, 473, 478, 483, 496, 502, 517, 527, + 531, 578, 601, 605, 625, 626, 632, 638, 641, 648, 652, 653, 667, 677, + 682, 697, 700, 711, 713, 733, 764, 780, 782, 796, 798, 801, 804, 812, + 817, 831, 835, 849, 859, 872, 886, 891, 893, 895, 903, 908, 914, 915, + 916, 917, 920, 921, 926, 944, 947, 950, 956, 962, 964, 969, 979, 986, + 994, 996, 1018, 1019, 1022, 1025, 1029, 1029, 1039, 1058, 1062, 1063, + 1064, 1068, 1069, 1071, 1075, 1082, 1085, 1096, 1098, 1102, 1103, 1104, + 1104, 1119, 1120, 1122, 1122, 1123, 1147, 1149, 1179, 1183, 1195, 1197, + 1200, 1201, 1214, 1215, 1220, 1221, 1221, 1225, 1229, 1252, 1260, 1261, + 1268, 1269, 1274, 1279, 1293, 1336, 1336, 1348, 1369, 1370, 1375, 1394, + 1401, 1414, 1444, 1453, 1459, 1468, 1473, 1473, 1474, 1485, 1502, 1505, + 1506, 1517, 1518, 1520, 1521, 1522, 1528, 1537, 1543, 1549, 1550, 1560, + 1565, 1566, 1585, 1599, 1604, 1619, 1637, 1650, 1658, 1679, 1684, 1691, + 1691, 1701, 1701, 1715, 1719, 1720, 1722, 1740, 1740, 1748, 1752, 1756, + 1756, 1776, 1796, 1799, 1799, 1800, 1809, 1811, 1828, 1829, 1849, 1859, + 1865, 1868, 1886, 1900, 1933, 1955, 1959, 1983, 1985, 1999, 2003, 2003, + 2029, 2038, 2048, 2050, 2054, 2056, 2059, 2060, 2079, 2095, 2099, 2104, + 2111, 2113, 2119, 2119, 2122, 2123, 2141, 2142, 2145, 2148, 2160, 2182, + 2183, 2200, 2203, 2209, 2210, 2221, 2232, 2261, 2267, 2268, 2272, 2283, + 2297, 2298, 2313, 2314, 2316, 2316, 2331, 2332, 2338, 2343, 2345, 2350, + 2350, 2365, 2378, 2384, 2392, 2399, 2414, 2420, 2425, 2433, 2445, 2457, + 2461, 2462, 2463, 2497, 2503, 2519, 2522, 2533, 2556, 2568, 2577, 2578, + 2578, 2585, 2589, 2603, 2603, 2613, 2616, 2648, 2651, 2662, 2666, 2667, + 2672, 2675, 2679, 2691, 2694, 2694, 2699, 2706, 2708, 2709, 2711, 2711, + 2732, 2736, 2738, 2749, 2750, 2763, 2764, 2770, 2775, 2781, 2793, 2811, + 2817, 2834, 2842, 2847, 2848, 2852, 2856, 2870, 2872, 2876, 2879, 2887, + 2897, 2903, 2980, 2984, 2994, 2997 }; + int set2[] = { 7, 21, 29, 31, 35, 37, 62, 64, 67, 72, 77, 88, 90, 96, 98, + 116, 152, 154, 156, 162, 163, 173, 179, 188, 189, 201, 203, 217, 224, + 233, 263, 267, 271, 277, 294, 301, 311, 336, 343, 349, 390, 395, 396, + 401, 407, 411, 414, 425, 432, 436, 444, 468, 476, 483, 492, 496, 497, + 501, 508, 513, 517, 519, 531, 541, 543, 552, 555, 555, 568, 571, 587, + 589, 594, 601, 604, 606, 625, 633, 634, 645, 649, 654, 655, 662, 664, + 665, 666, 671, 671, 678, 690, 693, 697, 708, 714, 723, 726, 743, 746, + 747, 772, 784, 806, 811, 812, 824, 834, 836, 844, 850, 863, 867, 890, + 890, 896, 905, 931, 933, 934, 940, 952, 959, 963, 968, 974, 978, 997, + 997, 1013, 1015, 1019, 1023, 1030, 1033, 1035, 1047, 1048, 1054, 1069, + 1087, 1147, 1156, 1158, 1165, 1175, 1199, 1211, 1224, 1252, 1255, 1256, + 1259, 1274, 1280, 1283, 1290, 1292, 1292, 1294, 1297, 1299, 1300, 1301, + 1312, 1323, 1337, 1340, 1351, 1352, 1356, 1363, 1385, 1392, 1395, 1399, + 1409, 1413, 1429, 1437, 1460, 1461, 1465, 1466, 1468, 1482, 1497, 1500, + 1501, 1508, 1517, 1524, 1524, 1529, 1530, 1538, 1538, 1544, 1545, 1552, + 1556, 1561, 1566, 1569, 1583, 1598, 1606, 1610, 1613, 1634, 1642, 1643, + 1656, 1675, 1682, 1704, 1708, 1711, 1711, 1719, 1724, 1736, 1740, 1741, + 1766, 1772, 1774, 1777, 1784, 1793, 1814, 1829, 1833, 1843, 1856, 1857, + 1870, 1874, 1879, 1884, 1886, 1890, 1901, 1909, 1912, 1940, 1944, 1946, + 1947, 1948, 1955, 1962, 1971, 1982, 1989, 1995, 1997, 2012, 2015, 2021, + 2043, 2046, 2049, 2055, 2064, 2068, 2069, 2083, 2088, 2100, 2117, 2122, + 2126, 2132, 2143, 2148, 2152, 2152, 2153, 2159, 2173, 2176, 2198, 2198, + 2201, 2205, 2206, 2207, 2211, 2222, 2230, 2254, 2256, 2264, 2268, 2317, + 2318, 2319, 2330, 2334, 2344, 2353, 2353, 2354, 2369, 2374, 2376, 2392, + 2402, 2403, 2414, 2417, 2422, 2424, 2435, 2445, 2461, 2475, 2530, 2539, + 2541, 2542, 2565, 2566, 2571, 2572, 2577, 2579, 2581, 2582, 2586, 2592, + 2595, 2600, 2642, 2645, 2645, 2651, 2668, 2676, 2699, 2705, 2705, 2709, + 2715, 2720, 2720, 2736, 2753, 2756, 2761, 2788, 2792, 2793, 2796, 2801, + 2815, 2834, 2842, 2857, 2859, 2859, 2861, 2865, 2869, 2875, 2879, 2884, + 2885, 2895, 2901, 2906, 2912, 2935, 2940, 2957, 2958, 2967, 2969, 2976, + 2978, 2981, 2984, 2994, 2997 }; + int set3[] = { 2994, 2997 }; + + P4DDocIdSet pset1 = new P4DDocIdSet(batch); + MyOpenBitSet pset2 = new MyOpenBitSet(); + P4DDocIdSet pset3 = new P4DDocIdSet(batch); + + for (int i = 0; i < set1.length; i++) { + pset1.addDoc(set1[i]); + pset2.set(set2[i]); + + } + for (int i = 0; i < set3.length; i++) { + pset3.addDoc(set3[i]); + } + + ArrayList orDocs = new ArrayList(); + orDocs.add(pset1); + orDocs.add(pset2); + + List its = new ArrayList(); + its.add(new OrDocIdSet(orDocs)); + its.add(pset3); + + AndDocIdSet andSet = new AndDocIdSet(its); + DocIdSetIterator dcit = andSet.iterator(); + while (dcit.next()) + System.out.println(dcit.doc()); + + } + + private static void testAndDocIdSetSkip(int batch, int length, int all) + throws IOException { + System.out.println("Running AndDocIdSet Skip test"); + System.out.println("----------------------------"); + // FileWriter writer = new FileWriter("/Users/abhasin/TestOps.txt"); + + ArrayList docSets = new ArrayList(); + Random random = new Random(); + // Minimum 5 bits + int b = 5; + int exceptionCount = 0; + double logBase2 = Math.log(2); + int randomizer = 0; + + for (int j = 0; j < all; j++) { + ArrayList intSet = new ArrayList(); + P4DDocIdSet docSet = new P4DDocIdSet(batch); + randomizer = 0; + for (int i = 1; i < length + 1; i++) { + + int bVal[] = new int[33]; + for (int k = 0; k < batch; k++) { + intSet.add(randomizer + (int) (random.nextDouble() * 1000)); + } + + randomizer += 1000; + Collections.sort(intSet); + + } + for (Integer c : intSet) { + docSet.addDoc(c); + } + // writer.write(intSet.toString()); + // writer.write("\n"); + // System.out.println(intSet); + + docSets.add(docSet); + } + // writer.flush(); + // writer.close(); + DocIdSetIterator dcit = new AndDocIdSet(docSets).iterator(); + + for (int i = 0; i < 200000; i += 64) { + try { + + if (dcit.skipTo(i)) { + // System.out.println("Target:"+i+" Found:"+dcit.doc()); + System.out.print(dcit.doc() + ","); + dcit.next(); + dcit.next(); + + } + + } catch (Exception e) { + e.printStackTrace(); + System.out.flush(); + System.exit(1); + } + } + + } + + private static void testOrDocIdSetSanity() throws IOException { + + System.out.println("Running AndDocIdSet Sanity test"); + System.out.println("----------------------------"); + + int set1[] = { 8, 27, 30, 35, 53, 59, 71, 74, 87, 92, 104, 113, 122, 126, + 135, 135, 137, 138, 185, 186, 188, 192, 197, 227, 227, 230, 242, 252, + 255, 259, 267, 270, 271, 289, 298, 305, 311, 312, 325, 335, 337, 346, + 351, 360, 365, 371, 375, 380, 387, 391, 406, 407, 408, 419, 425, 430, + 443, 451, 454, 456, 464, 466, 469, 473, 478, 483, 496, 502, 517, 527, + 531, 578, 601, 605, 625, 626, 632, 638, 641, 648, 652, 653, 667, 677, + 682, 697, 700, 711, 713, 733, 764, 780, 782, 796, 798, 801, 804, 812, + 817, 831, 835, 849, 859, 872, 886, 891, 893, 895, 903, 908, 914, 915, + 916, 917, 920, 921, 926, 944, 947, 950, 956, 962, 964, 969, 979, 986, + 994, 996, 1018, 1019, 1022, 1025, 1029, 1029, 1039, 1058, 1062, 1063, + 1064, 1068, 1069, 1071, 1075, 1082, 1085, 1096, 1098, 1102, 1103, 1104, + 1104, 1119, 1120, 1122, 1122, 1123, 1147, 1149, 1179, 1183, 1195, 1197, + 1200, 1201, 1214, 1215, 1220, 1221, 1221, 1225, 1229, 1252, 1260, 1261, + 1268, 1269, 1274, 1279, 1293, 1336, 1336, 1348, 1369, 1370, 1375, 1394, + 1401, 1414, 1444, 1453, 1459, 1468, 1473, 1473, 1474, 1485, 1502, 1505, + 1506, 1517, 1518, 1520, 1521, 1522, 1528, 1537, 1543, 1549, 1550, 1560, + 1565, 1566, 1585, 1599, 1604, 1619, 1637, 1650, 1658, 1679, 1684, 1691, + 1691, 1701, 1701, 1715, 1719, 1720, 1722, 1740, 1740, 1748, 1752, 1756, + 1756, 1776, 1796, 1799, 1799, 1800, 1809, 1811, 1828, 1829, 1849, 1859, + 1865, 1868, 1886, 1900, 1933, 1955, 1959, 1983, 1985, 1999, 2003, 2003, + 2029, 2038, 2048, 2050, 2054, 2056, 2059, 2060, 2079, 2095, 2099, 2104, + 2111, 2113, 2119, 2119, 2122, 2123, 2141, 2142, 2145, 2148, 2160, 2182, + 2183, 2200, 2203, 2209, 2210, 2221, 2232, 2261, 2267, 2268, 2272, 2283, + 2297, 2298, 2313, 2314, 2316, 2316, 2331, 2332, 2338, 2343, 2345, 2350, + 2350, 2365, 2378, 2384, 2392, 2399, 2414, 2420, 2425, 2433, 2445, 2457, + 2461, 2462, 2463, 2497, 2503, 2519, 2522, 2533, 2556, 2568, 2577, 2578, + 2578, 2585, 2589, 2603, 2603, 2613, 2616, 2648, 2651, 2662, 2666, 2667, + 2672, 2675, 2679, 2691, 2694, 2694, 2699, 2706, 2708, 2709, 2711, 2711, + 2732, 2736, 2738, 2749, 2750, 2763, 2764, 2770, 2775, 2781, 2793, 2811, + 2817, 2834, 2842, 2847, 2848, 2852, 2856, 2870, 2872, 2876, 2879, 2887, + 2897, 2903, 2980, 2984, 2986, 2997 }; + int set2[] = { 7, 21, 29, 31, 35, 37, 62, 64, 67, 72, 77, 88, 90, 96, 98, + 116, 152, 154, 156, 162, 163, 173, 179, 188, 189, 201, 203, 217, 224, + 233, 263, 267, 271, 277, 294, 301, 311, 336, 343, 349, 390, 395, 396, + 401, 407, 411, 414, 425, 432, 436, 444, 468, 476, 483, 492, 496, 497, + 501, 508, 513, 517, 519, 531, 541, 543, 552, 555, 555, 568, 571, 587, + 589, 594, 601, 604, 606, 625, 633, 634, 645, 649, 654, 655, 662, 664, + 665, 666, 671, 671, 678, 690, 693, 697, 708, 714, 723, 726, 743, 746, + 747, 772, 784, 806, 811, 812, 824, 834, 836, 844, 850, 863, 867, 890, + 890, 896, 905, 931, 933, 934, 940, 952, 959, 963, 968, 974, 978, 997, + 997, 1013, 1015, 1019, 1023, 1030, 1033, 1035, 1047, 1048, 1054, 1069, + 1087, 1147, 1156, 1158, 1165, 1175, 1199, 1211, 1224, 1252, 1255, 1256, + 1259, 1274, 1280, 1283, 1290, 1292, 1292, 1294, 1297, 1299, 1300, 1301, + 1312, 1323, 1337, 1340, 1351, 1352, 1356, 1363, 1385, 1392, 1395, 1399, + 1409, 1413, 1429, 1437, 1460, 1461, 1465, 1466, 1468, 1482, 1497, 1500, + 1501, 1508, 1517, 1524, 1524, 1529, 1530, 1538, 1538, 1544, 1545, 1552, + 1556, 1561, 1566, 1569, 1583, 1598, 1606, 1610, 1613, 1634, 1642, 1643, + 1656, 1675, 1682, 1704, 1708, 1711, 1711, 1719, 1724, 1736, 1740, 1741, + 1766, 1772, 1774, 1777, 1784, 1793, 1814, 1829, 1833, 1843, 1856, 1857, + 1870, 1874, 1879, 1884, 1886, 1890, 1901, 1909, 1912, 1940, 1944, 1946, + 1947, 1948, 1955, 1962, 1971, 1982, 1989, 1995, 1997, 2012, 2015, 2021, + 2043, 2046, 2049, 2055, 2064, 2068, 2069, 2083, 2088, 2100, 2117, 2122, + 2126, 2132, 2143, 2148, 2152, 2152, 2153, 2159, 2173, 2176, 2198, 2198, + 2201, 2205, 2206, 2207, 2211, 2222, 2230, 2254, 2256, 2264, 2268, 2317, + 2318, 2319, 2330, 2334, 2344, 2353, 2353, 2354, 2369, 2374, 2376, 2392, + 2402, 2403, 2414, 2417, 2422, 2424, 2435, 2445, 2461, 2475, 2530, 2539, + 2541, 2542, 2565, 2566, 2571, 2572, 2577, 2579, 2581, 2582, 2586, 2592, + 2595, 2600, 2642, 2645, 2645, 2651, 2668, 2676, 2699, 2705, 2705, 2709, + 2715, 2720, 2720, 2736, 2753, 2756, 2761, 2788, 2792, 2793, 2796, 2801, + 2815, 2834, 2842, 2857, 2859, 2859, 2861, 2865, 2869, 2875, 2879, 2884, + 2885, 2895, 2901, 2906, 2912, 2935, 2940, 2957, 2958, 2967, 2969, 2976, + 2978, 2981, 2984, 2990, 2991 }; + int set3[] = { 16, 22, 56, 70, 70, 86, 88, 102, 104, 108, 112, 124, 130, + 130, 143, 156, 162, 174, 177, 182, 183, 186, 197, 206, 217, 224, 234, + 238, 242, 245, 246, 259, 275, 278, 288, 289, 295, 301, 313, 316, 358, + 361, 366, 379, 386, 405, 432, 446, 457, 460, 467, 473, 474, 475, 475, + 491, 516, 539, 540, 551, 568, 579, 588, 593, 594, 598, 607, 619, 625, + 634, 636, 649, 670, 671, 677, 682, 690, 693, 713, 718, 723, 724, 730, + 732, 738, 746, 774, 777, 778, 811, 812, 821, 825, 825, 828, 837, 840, + 841, 847, 859, 863, 877, 878, 880, 880, 898, 901, 901, 909, 926, 931, + 931, 932, 941, 957, 961, 964, 980, 981, 982, 984, 989, 993, 996, 998, + 998, 999, 999, 1004, 1006, 1006, 1012, 1013, 1016, 1047, 1050, 1068, + 1069, 1076, 1076, 1080, 1081, 1088, 1092, 1105, 1106, 1109, 1110, 1111, + 1128, 1136, 1137, 1138, 1144, 1144, 1145, 1149, 1152, 1161, 1162, 1163, + 1171, 1177, 1178, 1201, 1204, 1252, 1253, 1263, 1266, 1275, 1279, 1290, + 1303, 1313, 1314, 1314, 1324, 1326, 1326, 1336, 1343, 1346, 1358, 1366, + 1376, 1426, 1439, 1441, 1445, 1456, 1460, 1460, 1463, 1464, 1466, 1467, + 1473, 1481, 1482, 1485, 1487, 1497, 1498, 1523, 1550, 1558, 1568, 1574, + 1581, 1585, 1591, 1592, 1606, 1611, 1619, 1622, 1634, 1636, 1644, 1648, + 1658, 1684, 1685, 1686, 1702, 1711, 1717, 1730, 1747, 1762, 1763, 1766, + 1812, 1826, 1835, 1851, 1855, 1858, 1864, 1865, 1867, 1881, 1886, 1933, + 1937, 1943, 1954, 1966, 1972, 1976, 1980, 1985, 1986, 1991, 1996, 2001, + 2019, 2026, 2032, 2041, 2061, 2069, 2077, 2078, 2082, 2083, 2089, 2098, + 2107, 2114, 2142, 2157, 2159, 2171, 2186, 2189, 2199, 2200, 2201, 2207, + 2212, 2219, 2221, 2236, 2243, 2251, 2256, 2260, 2265, 2265, 2275, 2277, + 2281, 2300, 2308, 2311, 2321, 2325, 2334, 2341, 2346, 2371, 2379, 2380, + 2383, 2397, 2399, 2401, 2404, 2407, 2411, 2450, 2482, 2499, 2505, 2514, + 2531, 2538, 2542, 2544, 2552, 2554, 2557, 2557, 2559, 2561, 2583, 2586, + 2600, 2620, 2622, 2625, 2626, 2632, 2641, 2649, 2649, 2649, 2658, 2661, + 2668, 2675, 2676, 2681, 2692, 2698, 2712, 2716, 2719, 2737, 2764, 2780, + 2781, 2790, 2791, 2793, 2801, 2802, 2807, 2809, 2814, 2815, 2855, 2855, + 2863, 2870, 2878, 2889, 2894, 2900, 2905, 2905, 2920, 2923, 2924, 2935, + 2951, 2952, 2956, 2971, 2983, 2984, 2997 }; + + // set1 = new int[]{0,2,4,6,8,10}; + // set2 = new int[]{0,1,3,5,7,10}; + // set3 = new int[]{0,1,2,4,5,10}; + + P4DDocIdSet pset1 = new P4DDocIdSet(128); + P4DDocIdSet pset2 = new P4DDocIdSet(128); + P4DDocIdSet pset3 = new P4DDocIdSet(128); + + for (int i = 0; i < set1.length; i++) { + pset1.addDoc(set1[i]); + pset2.addDoc(set2[i]); + pset3.addDoc(set3[i]); + } + + List its = new ArrayList(); + its.add(pset1); + its.add(pset2); + its.add(pset3); + + OrDocIdSet orSet = new OrDocIdSet(its); + DocIdSetIterator dcit = orSet.iterator(); + while (dcit.next()) + System.out.println(dcit.doc()); + + } + + private static void testAndDocIdSetSkipSanity(int batch) throws IOException { + + System.out.println("Running AndDocIdSet Skip sanity test"); + System.out.println("----------------------------"); + int set1[] = { 8, 27, 30, 35, 53, 59, 71, 74, 87, 92, 104, 113, 122, 126, + 135, 135, 137, 138, 185, 186, 188, 192, 197, 227, 227, 230, 242, 252, + 255, 259, 267, 270, 271, 289, 298, 305, 311, 312, 325, 335, 337, 346, + 351, 360, 365, 371, 375, 380, 387, 391, 406, 407, 408, 419, 425, 430, + 443, 451, 454, 456, 464, 466, 469, 473, 478, 483, 496, 502, 517, 527, + 531, 578, 601, 605, 625, 626, 632, 638, 641, 648, 652, 653, 667, 677, + 682, 697, 700, 711, 713, 733, 764, 780, 782, 796, 798, 801, 804, 812, + 817, 831, 835, 849, 859, 872, 886, 891, 893, 895, 903, 908, 914, 915, + 916, 917, 920, 921, 926, 944, 947, 950, 956, 962, 964, 969, 979, 986, + 994, 996, 1018, 1019, 1022, 1025, 1029, 1029, 1039, 1058, 1062, 1063, + 1064, 1068, 1069, 1071, 1075, 1082, 1085, 1096, 1098, 1102, 1103, 1104, + 1104, 1119, 1120, 1122, 1122, 1123, 1147, 1149, 1179, 1183, 1195, 1197, + 1200, 1201, 1214, 1215, 1220, 1221, 1221, 1225, 1229, 1252, 1260, 1261, + 1268, 1269, 1274, 1279, 1293, 1336, 1336, 1348, 1369, 1370, 1375, 1394, + 1401, 1414, 1444, 1453, 1459, 1468, 1473, 1473, 1474, 1485, 1502, 1505, + 1506, 1517, 1518, 1520, 1521, 1522, 1528, 1537, 1543, 1549, 1550, 1560, + 1565, 1566, 1585, 1599, 1604, 1619, 1637, 1650, 1658, 1679, 1684, 1691, + 1691, 1701, 1701, 1715, 1719, 1720, 1722, 1740, 1740, 1748, 1752, 1756, + 1756, 1776, 1796, 1799, 1799, 1800, 1809, 1811, 1828, 1829, 1849, 1859, + 1865, 1868, 1886, 1900, 1933, 1955, 1959, 1983, 1985, 1999, 2003, 2003, + 2029, 2038, 2048, 2050, 2054, 2056, 2059, 2060, 2079, 2095, 2099, 2104, + 2111, 2113, 2119, 2119, 2122, 2123, 2141, 2142, 2145, 2148, 2160, 2182, + 2183, 2200, 2203, 2209, 2210, 2221, 2232, 2261, 2267, 2268, 2272, 2283, + 2297, 2298, 2313, 2314, 2316, 2316, 2331, 2332, 2338, 2343, 2345, 2350, + 2350, 2365, 2378, 2384, 2392, 2399, 2414, 2420, 2425, 2433, 2445, 2457, + 2461, 2462, 2463, 2497, 2503, 2519, 2522, 2533, 2556, 2568, 2577, 2578, + 2578, 2585, 2589, 2603, 2603, 2613, 2616, 2648, 2651, 2662, 2666, 2667, + 2672, 2675, 2679, 2691, 2694, 2694, 2699, 2706, 2708, 2709, 2711, 2711, + 2732, 2736, 2738, 2749, 2750, 2763, 2764, 2770, 2775, 2781, 2793, 2811, + 2817, 2834, 2842, 2847, 2848, 2852, 2856, 2870, 2872, 2876, 2879, 2887, + 2897, 2903, 2980, 2984, 2994, 2997 }; + int set2[] = { 7, 21, 29, 31, 35, 37, 62, 64, 67, 72, 77, 88, 90, 96, 98, + 116, 152, 154, 156, 162, 163, 173, 179, 188, 189, 201, 203, 217, 224, + 233, 263, 267, 271, 277, 294, 301, 311, 336, 343, 349, 390, 395, 396, + 401, 407, 411, 414, 425, 432, 436, 444, 468, 476, 483, 492, 496, 497, + 501, 508, 513, 517, 519, 531, 541, 543, 552, 555, 555, 568, 571, 587, + 589, 594, 601, 604, 606, 625, 633, 634, 645, 649, 654, 655, 662, 664, + 665, 666, 671, 671, 678, 690, 693, 697, 708, 714, 723, 726, 743, 746, + 747, 772, 784, 806, 811, 812, 824, 834, 836, 844, 850, 863, 867, 890, + 890, 896, 905, 931, 933, 934, 940, 952, 959, 963, 968, 974, 978, 997, + 997, 1013, 1015, 1019, 1023, 1030, 1033, 1035, 1047, 1048, 1054, 1069, + 1087, 1147, 1156, 1158, 1165, 1175, 1199, 1211, 1224, 1252, 1255, 1256, + 1259, 1274, 1280, 1283, 1290, 1292, 1292, 1294, 1297, 1299, 1300, 1301, + 1312, 1323, 1337, 1340, 1351, 1352, 1356, 1363, 1385, 1392, 1395, 1399, + 1409, 1413, 1429, 1437, 1460, 1461, 1465, 1466, 1468, 1482, 1497, 1500, + 1501, 1508, 1517, 1524, 1524, 1529, 1530, 1538, 1538, 1544, 1545, 1552, + 1556, 1561, 1566, 1569, 1583, 1598, 1606, 1610, 1613, 1634, 1642, 1643, + 1656, 1675, 1682, 1704, 1708, 1711, 1711, 1719, 1724, 1736, 1740, 1741, + 1766, 1772, 1774, 1777, 1784, 1793, 1814, 1829, 1833, 1843, 1856, 1857, + 1870, 1874, 1879, 1884, 1886, 1890, 1901, 1909, 1912, 1940, 1944, 1946, + 1947, 1948, 1955, 1962, 1971, 1982, 1989, 1995, 1997, 2012, 2015, 2021, + 2043, 2046, 2049, 2055, 2064, 2068, 2069, 2083, 2088, 2100, 2117, 2122, + 2126, 2132, 2143, 2148, 2152, 2152, 2153, 2159, 2173, 2176, 2198, 2198, + 2201, 2205, 2206, 2207, 2211, 2222, 2230, 2254, 2256, 2264, 2268, 2317, + 2318, 2319, 2330, 2334, 2344, 2353, 2353, 2354, 2369, 2374, 2376, 2392, + 2402, 2403, 2414, 2417, 2422, 2424, 2435, 2445, 2461, 2475, 2530, 2539, + 2541, 2542, 2565, 2566, 2571, 2572, 2577, 2579, 2581, 2582, 2586, 2592, + 2595, 2600, 2642, 2645, 2645, 2651, 2668, 2676, 2699, 2705, 2705, 2709, + 2715, 2720, 2720, 2736, 2753, 2756, 2761, 2788, 2792, 2793, 2796, 2801, + 2815, 2834, 2842, 2857, 2859, 2859, 2861, 2865, 2869, 2875, 2879, 2884, + 2885, 2895, 2901, 2906, 2912, 2935, 2940, 2957, 2958, 2967, 2969, 2976, + 2978, 2981, 2984, 2994, 2997 }; + int set3[] = { 2994, 2997 }; + + P4DDocIdSet pset1 = new P4DDocIdSet(batch); + MyOpenBitSet pset2 = new MyOpenBitSet(); + P4DDocIdSet pset3 = new P4DDocIdSet(batch); + + for (int i = 0; i < set1.length; i++) { + pset1.addDoc(set1[i]); + pset2.set(set2[i]); + + } + for (int i = 0; i < set3.length; i++) { + pset3.addDoc(set3[i]); + } + + List its = new ArrayList(); + its.add(pset1); + its.add(pset2); + its.add(pset3); + AndDocIdSet andSet = new AndDocIdSet(its); + DocIdSetIterator dcit = andSet.iterator(); + while (dcit.next()) + System.out.println(dcit.doc()); + + } + + private static void testNotDocIdSetSkipSanity() { + // TODO Auto-generated method stub + } + + private static void testOrDocIdSetSkipSanity() { + // TODO Auto-generated method stub + } + + private static void testSmallSets() throws IOException { + + System.out.println("Running Small Set test"); + System.out.println("----------------------------"); + + MyOpenBitSet s1 = new MyOpenBitSet(); + MyOpenBitSet s2 = new MyOpenBitSet(); + MyOpenBitSet s3 = new MyOpenBitSet(); + MyOpenBitSet s4 = new MyOpenBitSet(); + + s1.set(0); + s1.set(4); + s1.set(5); + s1.set(6); + + s2.set(5); + s2.set(6); + + s3.set(1); + s3.set(5); + + ArrayList docSet = new ArrayList(); + docSet.add(s1); + docSet.add(s2); + docSet.add(s3); + + AndDocIdSet ord = new AndDocIdSet(docSet); + + for (DocIdSetIterator dcit = ord.iterator(); dcit.next();) + System.out.println(dcit.doc()); + + System.out.println("-----------"); + s1.set(5); + s2.set(5); + s3.set(5); + s4.set(5); + + AndDocIdSet ard = new AndDocIdSet(docSet); + + for (DocIdSetIterator dcit = ard.iterator(); dcit.next();) + System.out.println(dcit.doc()); + + s1.set(0); + + DocIdSetIterator nsit = new NotDocIdSet(s1, 5).iterator(); + + while (nsit.next()) + System.out.println(nsit.doc()); + + } + + public static void testCombinationSanity() throws IOException { + + int[] set1 = { 4, 19, 21, 35, 36, 43, 43, 73, 85, 104, 105, 106, 112, 118, + 119, 138, 141, 145, 146, 146, 196, 200, 202, 217, 219, 220, 221, 239, + 242, 243, 261, 276, 280, 281, 295, 297, 306, 309, 319, 324, 359, 375, + 376, 387, 398, 401, 406, 438, 442, 450, 450, 462, 469, 475, 495, 499, + 505, 505, 513, 513, 526, 529, 569, 584, 589, 590, 609, 614, 633, 635, + 635, 644, 646, 650, 657, 682, 685, 688, 692, 699, 704, 712, 714, 733, + 736, 739, 746, 748, 766, 768, 774, 776, 778, 786, 799, 801, 812, 814, + 818, 819, 831, 832, 836, 837, 837, 847, 864, 870, 872, 872, 875, 880, + 885, 899, 905, 914, 918, 928, 931, 932, 952, 954, 971, 981, 983, 986, + 992, 998, 1000, 1031, 1032, 1057, 1060, 1061, 1080, 1084, 1090, 1093, + 1100, 1100, 1107, 1109, 1115, 1116, 1139, 1148, 1150, 1159, 1162, 1167, + 1176, 1194, 1200, 1209, 1213, 1217, 1218, 1222, 1225, 1233, 1244, 1246, + 1252, 1277, 1309, 1322, 1325, 1327, 1327, 1329, 1341, 1341, 1342, 1352, + 1359, 1360, 1361, 1363, 1378, 1390, 1391, 1410, 1418, 1427, 1433, 1438, + 1441, 1448, 1449, 1451, 1471, 1488, 1489, 1490, 1500, 1503, 1504, 1505, + 1546, 1555, 1556, 1572, 1575, 1584, 1609, 1614, 1627, 1628, 1629, 1630, + 1638, 1652, 1663, 1664, 1665, 1674, 1686, 1688, 1689, 1692, 1702, 1703, + 1707, 1708, 1708, 1716, 1720, 1720, 1723, 1724, 1727, 1727, 1730, 1733, + 1735, 1738, 1750, 1755, 1758, 1767, 1775, 1786, 1803, 1810, 1812, 1830, + 1848, 1854, 1871, 1888, 1947, 1953, 1962, 1983, 1990, 1999 }; + int[] set2 = { 4, 105, 141, 633, 1953, 1962, 1983, 1990, 1999 }; + int[] set3 = { 4, 145, 146, 146, 196, 200, 202, 217, 219, 1999 }; + int[] set4 = { 4, 200, 202, 217, 219, 220, 221, 239, 242, 243, 261, 276, + 280, 281, 295, 297, 306, 309, 319, 324, 359, 375, 376, 387, 398, 401, + 406, 438, 442, 450, 450, 462, 469, 475, 495, 499, 505, 505, 513, 513, + 526, 529, 569, 584, 589, 590, 609, 614, 633, 635, 635, 644, 646, 650, + 657, 682, 685, 688, 692, 699, 704, 712, 714, 733, 736, 739, 746, 748, + 766, 768, 774, 776, 778, 786, 799, 801, 812, 814, 818, 819, 831, 832, + 836, 837, 837, 847, 864, 870, 872, 872, 875, 880, 885, 899, 905, 914, + 918, 928, 931, 932, 952, 954, 971, 981, 983, 986, 992, 998, 1000, 1031, + 1032, 1057, 1060, 1061, 1080, 1084, 1090, 1093, 1100, 1100, 1107, 1109, + 1115, 1116, 1139, 1148, 1150, 1159, 1162, 1167, 1176, 1194, 1200, 1209, + 1213, 1217, 1218, 1222, 1225, 1233, 1244, 1246, 1252, 1277, 1309, 1322, + 1325, 1327, 1327, 1329, 1341, 1341, 1342, 1352, 1359, 1360, 1361, 1363, + 1378, 1390, 1391, 1410, 1418, 1427, 1433, 1438, 1441, 1448, 1449, 1451, + 1471, 1488, 1489, 1490, 1500, 1503, 1504, 1505, 1546, 1555, 1556, 1572, + 1575, 1584, 1609, 1614, 1627, 1628, 1629, 1630, 1638, 1652, 1663, 1664, + 1665, 1674, 1686, 1688, 1689, 1692, 1702, 1703, 1707, 1708, 1708, 1716, + 1720, 1720, 1723, 1724, 1727, 1727, 1730, 1733, 1735, 1738, 1750, 1755, + 1758, 1767, 1775, 1786, 1803, 1810, 1812, 1830, 1848, 1854, 1871, 1888, + 1947, 1953, 1962, 1983, 1990, 1999 }; + int[] set5 = { 4, 1999 }; + int[] set6 = { 2000 }; + + OpenBitSet ps1 = new OpenBitSet(); + + // Build open bit set + for (int i = 0; i < set1.length; i++) + ps1.set(set1[i]); + + OpenBitSet ps2 = new OpenBitSet(); + + // Build open bit set + for (int i = 0; i < set2.length; i++) + ps2.set(set2[i]); + + OpenBitSet ps3 = new OpenBitSet(); + + // Build open bit set + for (int i = 0; i < set3.length; i++) + ps3.set(set3[i]); + + P4DDocIdSet ps4 = new P4DDocIdSet(128); + + // Build open bit set + for (int i = 0; i < set4.length; i++) + ps4.addDoc(set4[i]); + + OpenBitSet ps5 = new OpenBitSet(); + + // Build open bit set + for (int i = 0; i < set5.length; i++) + ps5.set(set5[i]); + + P4DDocIdSet ps6 = new P4DDocIdSet(128); + ps6.addDoc(2000); + + ArrayList sets = new ArrayList(); + sets.add(ps1); + sets.add(ps2); + sets.add(ps3); + sets.add(ps4); + sets.add(ps5); + + System.out.println("Running Combination Sanity test CASE 1"); + System.out + .println("TEST CASE : Or first 4 sets, AND with the 5th should recover set5"); + System.out.println("----------------------------"); + + OrDocIdSet ord = new OrDocIdSet(sets); + + ArrayList sets2 = new ArrayList(); + sets2.add(ord); + sets2.add(ps5); + + AndDocIdSet and = new AndDocIdSet(sets2); + + DocIdSetIterator andit = and.iterator(); + + int index = 0; + while (andit.next()) { + if (set5[index++] != andit.doc()) + System.err.println("Error in combination test: expected - " + + set5[index - 1] + " but was - " + andit.doc()); + } + + if (index != set5.length) + System.err + .println("Error: could not recover all and elements: expected length-" + + set5.length + " but was -" + index); + + System.out.println("Combination sanity complete."); + System.out.println(); + + System.out.println("Running Combination Sanity test CASE 2"); + System.out + .println("TEST CASE : AND first 4 sets, AND with the 5th should recover set5"); + System.out.println("----------------------------"); + + AndDocIdSet and1 = new AndDocIdSet(sets); + + sets2 = new ArrayList(); + sets2.add(and1); + sets2.add(ps5); + + AndDocIdSet and2 = new AndDocIdSet(sets2); + + andit = and2.iterator(); + + index = 0; + while (andit.next()) { + if (set5[index++] != andit.doc()) + System.err.println("Error in combination test: expected - " + + set5[index - 1] + " but was - " + andit.doc()); + } + + if (index != set5.length) + System.err + .println("Error: could not recover all and elements: expected length-" + + set5.length + " but was -" + index); + + System.out.println("Combination sanity CASE 2 complete."); + System.out.println(); + + System.out.println("Running Combination Sanity test CASE 3"); + System.out + .println("TEST CASE : OR last 4 sets, OR with the 1st should recover set1"); + System.out.println("----------------------------"); + + sets.clear(); + sets.add(ps2); + sets.add(ps3); + sets.add(ps4); + sets.add(ps5); + + OrDocIdSet or3 = new OrDocIdSet(sets); + + sets2 = new ArrayList(); + sets2.add(or3); + sets2.add(ps1); + + OrDocIdSet or4 = new OrDocIdSet(sets2); + + DocIdSetIterator orit = or4.iterator(); + + index = 0; + int ctr = 0; + while (orit.next()) { + index = ps1.nextSetBit(index); + if (index == -1) + System.err + .println("Error in combination test: no value expected but was - " + + orit.doc()); + else if (index != orit.doc()) + System.err.println("Error in combination test: expected - " + + set1[index - 1] + " but was - " + orit.doc()); + index++; + ctr++; + } + + if (ctr != ps1.cardinality()) + System.err + .println("Error: could not recover all and elements: expected length-" + + ctr + " but was -" + ps1.cardinality()); + + System.out.println("Combination sanity CASE 3 complete."); + System.out.println(); + + System.out.println("Running Combination Sanity test CASE 4"); + System.out + .println("TEST CASE : OR last 4 sets, OR with the 1st and ~{2000} should recover 0-1999"); + System.out.println("----------------------------"); + + sets.clear(); + sets.add(ps2); + sets.add(ps3); + sets.add(ps4); + sets.add(ps5); + + OrDocIdSet or5 = new OrDocIdSet(sets); + NotDocIdSet not = new NotDocIdSet(ps6, 2001); + + sets2 = new ArrayList(); + sets2.add(or3); + sets2.add(ps1); + sets2.add(not); + + OrDocIdSet or6 = new OrDocIdSet(sets2); + + orit = or6.iterator(); + + index = 0; + ctr = 0; + + while (orit.next()) { + if (index != orit.doc()) + System.err.println("Error in combination test: expected - " + index + + " but was - " + orit.doc()); + index++; + + } + + if (index != set6[0]) + System.err + .println("Error: could not recover all and elements: expected length-" + + set6[0] + " but was -" + index); + + System.out.println("Combination sanity CASE 4 complete."); + System.out.println(); + + } + + public static void testBoboFailureCaseSmall() throws IOException { + + System.out.println("Running BOBO Small Test case..."); + System.out.println("----------------------------"); + + OpenBitSet bs1 = new OpenBitSet(); + OpenBitSet bs2 = new OpenBitSet(); + OpenBitSet bs3 = new OpenBitSet(); + + bs1.set(858); + bs2.set(857); + ArrayList sets = new ArrayList(); + sets.add(bs1); + sets.add(bs2); + OrDocIdSet ord = new OrDocIdSet(sets); + bs3.set(857); + bs3.set(858); + ArrayList sets2 = new ArrayList(); + sets2.add(ord); + sets2.add(bs3); + + AndDocIdSet and = new AndDocIdSet(sets2); + DocIdSetIterator andit = and.iterator(); + while (andit.next()) { + System.out.println(andit.doc()); + } + + } + + public static void testBoboFailureCase() throws IOException { + + System.out.println("Running BOBO Test case..."); + System.out.println("----------------------------"); + + int set0[] = { 9, 20, 31, 42, 65, 76, 87, 108, 119, 130, 141, 152, 163, + 186, 197, 208, 219, 230, 241, 265, 276, 287, 298, 309, 332, 343, 354, + 365, 376, 387, 410, 421, 431, 442, 453, 476, 487, 498, 509, 520, 531, + 554, 565, 575, 586, 597, 608, 619, 630, 653, 664, 675, 686, 697, 708, + 717, 728, 739, 750, 773, 784, 814, 820, 831, 842, 853 }; + int set1[] = { 8, 19, 30, 53, 64, 75, 86, 96, 107, 118, 129, 140, 151, 174, + 185, 196, 207, 218, 229, 252, 264, 275, 286, 297, 320, 331, 342, 353, + 364, 375, 398, 409, 420, 430, 441, 464, 475, 486, 497, 508, 519, 542, + 553, 564, 574, 585, 596, 607, 618, 641, 652, 663, 674, 685, 696, 716, + 727, 738, 761, 772, 783, 802, 813, 819, 830, 841 }; + int set2[] = { 7, 41, 52, 63, 74, 85, 106, 117, 128, 139, 162, 173, 184, + 195, 206, 217, 240, 251, 263, 274, 285, 308, 319, 330, 341, 352, 363, + 386, 397, 408, 419, 429, 452, 463, 474, 485, 496, 507, 530, 541, 552, + 563, 573, 584, 595, 606, 629, 640, 651, 662, 673, 684, 707, 715, 726, + 749, 760, 771, 782, 791, 801, 812, 818, 829, 852, 858 }; + int set3[] = { 6, 29, 40, 51, 62, 73, 84, 105, 116, 127, 150, 161, 172, + 183, 194, 205, 228, 239, 250, 262, 273, 296, 307, 318, 329, 340, 351, + 374, 385, 396, 407, 418, 440, 451, 462, 473, 484, 495, 518, 529, 540, + 551, 562, 572, 583, 594, 617, 628, 639, 650, 661, 672, 695, 706, 714, + 737, 748, 759, 770, 781, 790, 793, 800, 811, 840, 851 }; + int set4[] = { 17, 28, 39, 50, 61, 72, 95, 104, 115, 138, 149, 160, 171, + 182, 193, 216, 227, 238, 249, 260, 261, 284, 295, 306, 317, 328, 339, + 362, 373, 384, 395, 406, 417, 439, 450, 461, 472, 483, 506, 517, 528, + 539, 550, 561, 582, 605, 616, 627, 638, 649, 660, 683, 694, 705, 725, + 736, 747, 758, 769, 780, 789, 799, 810, 828, 839, 850 }; + int set5[] = { 5, 16, 27, 38, 49, 60, 83, 94, 103, 126, 137, 148, 159, 170, + 181, 204, 215, 226, 237, 248, 259, 272, 283, 294, 305, 316, 327, 350, + 361, 372, 383, 394, 405, 428, 438, 449, 460, 471, 494, 505, 516, 527, + 538, 549, 593, 604, 615, 626, 637, 648, 671, 682, 693, 704, 724, 735, + 746, 757, 768, 788, 792, 798, 809, 827, 838, 849 }; + int set6[] = { 4, 15, 26, 37, 48, 71, 82, 93, 114, 125, 136, 147, 158, 169, + 192, 203, 214, 225, 236, 247, 271, 282, 293, 304, 315, 338, 349, 360, + 371, 382, 393, 416, 427, 437, 448, 459, 482, 493, 504, 515, 526, 537, + 560, 571, 581, 592, 603, 614, 625, 636, 659, 670, 681, 692, 703, 723, + 734, 745, 756, 779, 787, 796, 797, 826, 837, 848 }; + int set7[] = { 3, 14, 25, 36, 59, 70, 81, 92, 102, 113, 124, 135, 146, 157, + 180, 191, 202, 213, 224, 235, 258, 270, 281, 292, 303, 326, 337, 348, + 359, 370, 381, 404, 415, 426, 436, 447, 470, 481, 492, 503, 514, 525, + 548, 559, 570, 580, 591, 602, 613, 624, 647, 658, 669, 680, 691, 702, + 722, 733, 744, 767, 778, 795, 808, 825, 836, 847 }; + int set8[] = { 2, 13, 24, 47, 58, 69, 80, 91, 101, 112, 123, 134, 145, 168, + 179, 190, 201, 212, 223, 246, 257, 269, 280, 291, 314, 325, 336, 347, + 358, 369, 392, 403, 414, 425, 435, 458, 469, 480, 491, 502, 513, 536, + 547, 558, 569, 579, 590, 601, 612, 635, 646, 657, 668, 679, 690, 713, + 721, 732, 755, 766, 777, 786, 794, 807, 824, 835 }; + int set9[] = { 1, 10, 12, 21, 32, 35, 43, 46, 54, 57, 68, 77, 79, 88, 90, + 97, 100, 111, 120, 122, 131, 133, 142, 153, 156, 164, 167, 175, 178, + 189, 198, 200, 209, 211, 220, 231, 234, 242, 245, 253, 256, 266, 268, + 277, 279, 288, 299, 302, 310, 313, 321, 324, 335, 344, 346, 355, 357, + 366, 377, 380, 388, 391, 399, 402, 413, 422, 424, 432, 443, 446, 454, + 457, 465, 468, 479, 488, 490, 499, 501, 510, 521, 524, 532, 535, 543, + 546, 557, 566, 568, 578, 587, 589, 598, 600, 609, 620, 623, 631, 634, + 642, 645, 656, 665, 667, 676, 678, 687, 698, 701, 709, 712, 718, 720, + 729, 740, 743, 751, 754, 762, 765, 776, 785, 803, 806, 817, 821, 823, + 832, 843, 846, 854 }; + int set10[] = { 23, 34, 45, 56, 67, 78, 99, 110, 121, 144, 155, 166, 177, + 188, 199, 222, 233, 244, 255, 267, 290, 301, 312, 323, 334, 345, 368, + 379, 390, 401, 412, 423, 434, 445, 456, 467, 478, 489, 512, 523, 534, + 545, 556, 567, 577, 588, 611, 622, 633, 644, 655, 666, 689, 700, 711, + 731, 742, 753, 764, 775, 805, 816, 834, 845, 856, 857 }; + int set11[] = { 11, 22, 33, 44, 55, 66, 89, 98, 109, 132, 143, 154, 165, + 176, 187, 210, 221, 232, 243, 254, 278, 289, 300, 311, 322, 333, 356, + 367, 378, 389, 400, 411, 433, 444, 455, 466, 477, 500, 511, 522, 533, + 544, 555, 576, 599, 610, 621, 632, 643, 654, 677, 688, 699, 710, 719, + 730, 741, 752, 763, 774, 804, 815, 822, 833, 844, 855 }; + + int set12[] = { 857, 858 }; + + OpenBitSet ps0 = new OpenBitSet(); + for (int i = 0; i < set0.length; i++) + ps0.set(set0[i]); + + OpenBitSet ps1 = new OpenBitSet(); + for (int i = 0; i < set1.length; i++) + ps1.set(set1[i]); + + OpenBitSet ps2 = new OpenBitSet(); + for (int i = 0; i < set2.length; i++) + ps2.set(set2[i]); + + OpenBitSet ps3 = new OpenBitSet(); + for (int i = 0; i < set3.length; i++) + ps3.set(set3[i]); + + OpenBitSet ps4 = new OpenBitSet(); + for (int i = 0; i < set4.length; i++) + ps4.set(set4[i]); + + OpenBitSet ps5 = new OpenBitSet(); + for (int i = 0; i < set5.length; i++) + ps5.set(set5[i]); + + OpenBitSet ps6 = new OpenBitSet(); + for (int i = 0; i < set6.length; i++) + ps6.set(set6[i]); + + OpenBitSet ps7 = new OpenBitSet(); + for (int i = 0; i < set7.length; i++) + ps7.set(set7[i]); + + OpenBitSet ps8 = new OpenBitSet(); + for (int i = 0; i < set8.length; i++) + ps8.set(set8[i]); + + OpenBitSet ps9 = new OpenBitSet(); + for (int i = 0; i < set9.length; i++) + ps9.set(set9[i]); + + OpenBitSet ps10 = new OpenBitSet(); + for (int i = 0; i < set10.length; i++) + ps10.set(set10[i]); + + OpenBitSet ps11 = new OpenBitSet(); + for (int i = 0; i < set11.length; i++) + ps11.set(set11[i]); + + ArrayList sets = new ArrayList(); + sets.add(ps0); + sets.add(ps1); + sets.add(ps2); + sets.add(ps3); + sets.add(ps4); + sets.add(ps5); + sets.add(ps6); + sets.add(ps7); + sets.add(ps8); + sets.add(ps9); + sets.add(ps10); + sets.add(ps11); + + OrDocIdSet ord = new OrDocIdSet(sets); + DocIdSetIterator dcit = ord.iterator(); + while (dcit.next()) + System.out.print(dcit.doc() + ","); + System.out.println(""); + + OpenBitSet ps12 = new OpenBitSet(); + for (int i = 0; i < set12.length; i++) + ps12.set(set12[i]); + + ArrayList sets2 = new ArrayList(); + sets2.add(ord); + sets2.add(ps12); + + AndDocIdSet andSet = new AndDocIdSet(sets2); + DocIdSetIterator andit = andSet.iterator(); + + while (andit.next()) + System.out.print(andit.doc() + ","); + System.out.println(""); + + } + + public static void testBoboFailureCase2() throws IOException { + + System.out.println("Running BOBO Test case 2..."); + System.out.println("----------------------------"); + + int set0[] = { 9, 20, 31, 42, 65, 76, 87, 108, 119, 130, 141, 152, 163, + 186, 197, 208, 219, 230, 241, 265, 276, 287, 298, 309, 332, 343, 354, + 365, 376, 387, 410, 421, 431, 442, 453, 476, 487, 498, 509, 520, 531, + 554, 565, 575, 586, 597, 608, 619, 630, 653, 664, 675, 686, 697, 708, + 717, 728, 739, 750, 773, 784, 814, 820, 831, 842, 853 }; + int set1[] = { 8, 19, 30, 53, 64, 75, 86, 96, 107, 118, 129, 140, 151, 174, + 185, 196, 207, 218, 229, 252, 264, 275, 286, 297, 320, 331, 342, 353, + 364, 375, 398, 409, 420, 430, 441, 464, 475, 486, 497, 508, 519, 542, + 553, 564, 574, 585, 596, 607, 618, 641, 652, 663, 674, 685, 696, 716, + 727, 738, 761, 772, 783, 802, 813, 819, 830, 841 }; + int set2[] = { 7, 18, 41, 52, 63, 74, 85, 106, 117, 128, 139, 162, 173, + 184, 195, 206, 217, 240, 251, 263, 274, 285, 308, 319, 330, 341, 352, + 363, 386, 397, 408, 419, 429, 452, 463, 474, 485, 496, 507, 530, 541, + 552, 563, 573, 584, 595, 606, 629, 640, 651, 662, 673, 684, 707, 715, + 726, 749, 760, 771, 782, 791, 801, 812, 818, 829, 852 }; + int set3[] = { 6, 29, 40, 51, 62, 73, 84, 105, 116, 127, 150, 161, 172, + 183, 194, 205, 228, 239, 250, 262, 273, 296, 307, 318, 329, 340, 351, + 374, 385, 396, 407, 418, 440, 451, 462, 473, 484, 495, 518, 529, 540, + 551, 562, 572, 583, 594, 617, 628, 639, 650, 661, 672, 695, 706, 714, + 737, 748, 759, 770, 781, 790, 793, 800, 811, 840, 851 }; + int set4[] = { 17, 28, 39, 50, 61, 72, 95, 104, 115, 138, 149, 160, 171, + 182, 193, 216, 227, 238, 249, 260, 261, 284, 295, 306, 317, 328, 339, + 362, 373, 384, 395, 406, 417, 439, 450, 461, 472, 483, 506, 517, 528, + 539, 550, 561, 582, 605, 616, 627, 638, 649, 660, 683, 694, 705, 725, + 736, 747, 758, 769, 780, 789, 799, 810, 828, 839, 850 }; + int set5[] = { 5, 16, 27, 38, 49, 60, 83, 94, 103, 126, 137, 148, 159, 170, + 181, 204, 215, 226, 237, 248, 259, 272, 283, 294, 305, 316, 327, 350, + 361, 372, 383, 394, 405, 428, 438, 449, 460, 471, 494, 505, 516, 527, + 538, 549, 593, 604, 615, 626, 637, 648, 671, 682, 693, 704, 724, 735, + 746, 757, 768, 788, 792, 798, 809, 827, 838, 849 }; + int set6[] = { 4, 15, 26, 37, 48, 71, 82, 93, 114, 125, 136, 147, 158, 169, + 192, 203, 214, 225, 236, 247, 271, 282, 293, 304, 315, 338, 349, 360, + 371, 382, 393, 416, 427, 437, 448, 459, 482, 493, 504, 515, 526, 537, + 560, 571, 581, 592, 603, 614, 625, 636, 659, 670, 681, 692, 703, 723, + 734, 745, 756, 779, 787, 796, 797, 826, 837, 848 }; + int set7[] = { 3, 14, 25, 36, 59, 70, 81, 92, 102, 113, 124, 135, 146, 157, + 180, 191, 202, 213, 224, 235, 258, 270, 281, 292, 303, 326, 337, 348, + 359, 370, 381, 404, 415, 426, 436, 447, 470, 481, 492, 503, 514, 525, + 548, 559, 570, 580, 591, 602, 613, 624, 647, 658, 669, 680, 691, 702, + 722, 733, 744, 767, 778, 795, 808, 825, 836, 847 }; + int set8[] = { 2, 13, 24, 47, 58, 69, 80, 91, 101, 112, 123, 134, 145, 168, + 179, 190, 201, 212, 223, 246, 257, 269, 280, 291, 314, 325, 336, 347, + 358, 369, 392, 403, 414, 425, 435, 458, 469, 480, 491, 502, 513, 536, + 547, 558, 569, 579, 590, 601, 612, 635, 646, 657, 668, 679, 690, 713, + 721, 732, 755, 766, 777, 786, 794, 807, 824, 835 }; + int set9[] = { 1, 10, 12, 21, 32, 35, 43, 46, 54, 57, 68, 77, 79, 88, 90, + 97, 100, 111, 120, 122, 131, 133, 142, 153, 156, 164, 167, 175, 178, + 189, 198, 200, 209, 211, 220, 231, 234, 242, 245, 253, 256, 266, 268, + 277, 279, 288, 299, 302, 310, 313, 321, 324, 335, 344, 346, 355, 357, + 366, 377, 380, 388, 391, 399, 402, 413, 422, 424, 432, 443, 446, 454, + 457, 465, 468, 479, 488, 490, 499, 501, 510, 521, 524, 532, 535, 543, + 546, 557, 566, 568, 578, 587, 589, 598, 600, 609, 620, 623, 631, 634, + 642, 645, 656, 665, 667, 676, 678, 687, 698, 701, 709, 712, 718, 720, + 729, 740, 743, 751, 754, 762, 765, 776, 785, 803, 806, 817, 821, 823, + 832, 843, 846, 854 }; + int set10[] = { 23, 34, 45, 56, 67, 78, 99, 110, 121, 144, 155, 166, 177, + 188, 199, 222, 233, 244, 255, 267, 290, 301, 312, 323, 334, 345, 368, + 379, 390, 401, 412, 423, 434, 445, 456, 467, 478, 489, 512, 523, 534, + 545, 556, 567, 577, 588, 611, 622, 633, 644, 655, 666, 689, 700, 711, + 731, 742, 753, 764, 775, 805, 816, 834, 845, 856, 857, 858 }; + int set11[] = { 11, 22, 33, 44, 55, 66, 89, 98, 109, 132, 143, 154, 165, + 176, 187, 210, 221, 232, 243, 254, 278, 289, 300, 311, 322, 333, 356, + 367, 378, 389, 400, 411, 433, 444, 455, 466, 477, 500, 511, 522, 533, + 544, 555, 576, 599, 610, 621, 632, 643, 654, 677, 688, 699, 710, 719, + 730, 741, 752, 763, 774, 804, 815, 822, 833, 844, 855 }; + int set12[] = { 857, 858 }; + + OpenBitSet ps0 = new OpenBitSet(); + for (int i = 0; i < set0.length; i++) + ps0.set(set0[i]); + + OpenBitSet ps1 = new OpenBitSet(); + for (int i = 0; i < set1.length; i++) + ps1.set(set1[i]); + + OpenBitSet ps2 = new OpenBitSet(); + for (int i = 0; i < set2.length; i++) + ps2.set(set2[i]); + + OpenBitSet ps3 = new OpenBitSet(); + for (int i = 0; i < set3.length; i++) + ps3.set(set3[i]); + + OpenBitSet ps4 = new OpenBitSet(); + for (int i = 0; i < set4.length; i++) + ps4.set(set4[i]); + + OpenBitSet ps5 = new OpenBitSet(); + for (int i = 0; i < set5.length; i++) + ps5.set(set5[i]); + + OpenBitSet ps6 = new OpenBitSet(); + for (int i = 0; i < set6.length; i++) + ps6.set(set6[i]); + + OpenBitSet ps7 = new OpenBitSet(); + for (int i = 0; i < set7.length; i++) + ps7.set(set7[i]); + + OpenBitSet ps8 = new OpenBitSet(); + for (int i = 0; i < set8.length; i++) + ps8.set(set8[i]); + + P4DDocIdSet ps9 = new P4DDocIdSet(128); + for (int i = 0; i < set9.length; i++) + ps9.addDoc(set9[i]); + + OpenBitSet ps10 = new OpenBitSet(); + for (int i = 0; i < set10.length; i++) + ps10.set(set10[i]); + + OpenBitSet ps11 = new OpenBitSet(); + for (int i = 0; i < set11.length; i++) + ps11.set(set11[i]); + + ArrayList sets = new ArrayList(); + sets.add(ps0); + sets.add(ps1); + sets.add(ps2); + sets.add(ps3); + sets.add(ps4); + sets.add(ps5); + sets.add(ps6); + sets.add(ps7); + sets.add(ps8); + sets.add(ps9); + sets.add(ps10); + sets.add(ps11); + + OrDocIdSet ord = new OrDocIdSet(sets); + DocIdSetIterator dcit = ord.iterator(); + while (dcit.next()) + System.out.print(dcit.doc() + ","); + System.out.println(""); + + OpenBitSet ps12 = new OpenBitSet(); + for (int i = 0; i < set12.length; i++) + ps12.set(set12[i]); + + ArrayList sets2 = new ArrayList(); + sets2.add(ord); + sets2.add(ps12); + + AndDocIdSet andSet = new AndDocIdSet(sets2); + DocIdSetIterator andit = andSet.iterator(); + + while (andit.next()) + System.out.print(andit.doc() + ","); + System.out.println(""); + + } + + + /** + * Test the representation logic for getNetworkInRange + * + * @throws Exception + */ + public static void testOptimizeRepresentation() throws Exception + { + + Random random = new Random(); + int length[] = {500,5000,10000}; + int batch = 128; + int randomizer = 0; + + System.out.println("Running Test Optimize Representation ..."); + System.out.println("----------------------------"); + + + for(int x=0; x < length.length; x++) + { + randomizer = 0; + int[] network = new int[length[x]*batch]; + + for(int i = 0;i list = new ArrayList(); + + for(int k=0;k 10) + { + OBSDocIdSet bitSet = new OBSDocIdSet(size); + for(int i=0;i or1 = new ArrayList(); + or1.add(a); + or1.add(d); + or1.add(g); + + ArrayList or2 = new ArrayList(); + or2.add(b); + or2.add(e); + or2.add(h); + + ArrayList or3 = new ArrayList(); + or3.add(c); + or3.add(f); + or3.add(i); + + ArrayList or4 = new ArrayList(); + or4.add(d); + or4.add(g); + or4.add(i); + + ArrayList or5 = new ArrayList(); + or5.add(e); + or5.add(h); + or5.add(k); + + ArrayList and6 = new ArrayList(); + and6.add(new OrDocIdSet(or1)); + and6.add(new OrDocIdSet(or2)); + and6.add(new OrDocIdSet(or3)); + and6.add(new OrDocIdSet(or4)); + and6.add(new OrDocIdSet(or5)); + + AndDocIdSet and = new AndDocIdSet(and6); + DocIdSetIterator dcit = and.iterator(); + + long nowMillis = System.currentTimeMillis(); + int cnt = 0; + while(dcit.next()){ + cnt ++ ; + dcit.doc(); + } + + nowMillis = System.currentTimeMillis() - nowMillis; + System.out.println("Count hit: "+ cnt); + System.out.println("Time to execute: "+ nowMillis + " ns.."); + + + + + + } + + + public static void main(String args[]) throws IOException { + + for (int i = 0; i < 50; i++) { + + testP4DDocIdSetIteratePerf(128,10000); + + //testOBSDocIdSet(128, 50, 12000000); + //testSpellCheckerUsageSet(100000, 14000000); + /* + * testP4DDocIdSetIteratePerf(128,50); testP4DDocIdSetSkipPerf(128,50); + * testP4DDocIdSetCompressionSanity(128,5); + * testP4DDocIdSetNonBoundaryCompressionSanity(128, 5, 50); + * testP4DDocIdSetSkipSanity(128, 5); + * testP4DDocIdSetNonBoundarySkipSanity(128, 1, 32); + * + * testIntArrayDocIdSetIterateSanity(20000); + * testIntArrayDocIdSetSkipSanity(200); + * + * testCombinationSetOperation(128,10,3); + * + * testOrDocIdSetSkip(128,50,3); testAndDocIdSetSkip(128,3,3); + * testAndDocIdSet(128,54,3); Å testNotDocIdSetSkipSanity(128,50,1000000); + * + * testIntArrayDocIdSetIterateSanity(20000); + * testIntArrayDocIdSetSkipSanity(200); + * ; + + testOrDocIdSet(128, 1, 3); + testSmallSets(); + testNotDocIdSet(128, 2, 1000000); + testAndDocIdSet(128, 1, 2); + testOrDocIdSetSanity(); + testCombinationSanity(); + testBoboFailureCase(); + testBoboFailureCaseSmall(); + testBoboFailureCase2(); + + // testOrDocIdSetSkipSanity(128); + // testNotDocIdSetSkipSanity(128); + testAndDocIdSetSkipSanity(128); + testOptimizeRepresentation(); + */ + + } + + } + + + +} Index: contrib/kamikaze/test/org/apache/lucene/kamikaze/test/perf/TestRealisticOrNetwork.java =================================================================== --- contrib/kamikaze/test/org/apache/lucene/kamikaze/test/perf/TestRealisticOrNetwork.java (revision 0) +++ contrib/kamikaze/test/org/apache/lucene/kamikaze/test/perf/TestRealisticOrNetwork.java (revision 0) @@ -0,0 +1,110 @@ +package org.apache.lucene.kamikaze.test.perf; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; + +import org.apache.lucene.kamikaze.docidset.api.DocSet; +import org.apache.lucene.kamikaze.docidset.impl.IntArrayDocIdSet; +import org.apache.lucene.kamikaze.docidset.impl.OrDocIdSet; +import org.apache.lucene.kamikaze.docidset.impl.P4DDocIdSet; +import org.apache.lucene.kamikaze.docidset.utils.DocSetFactory; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; + + +public class TestRealisticOrNetwork { + + + public static void main(String args[]) throws IOException + { + + ArrayList arr = new ArrayList() ; + + for(int i=0;i<4;i++) + { + arr.add(loadDegree(i)); + + } + OrDocIdSet ord = new OrDocIdSet(arr); + DocIdSetIterator orit = ord.iterator(); + + while(orit.next()) + orit.doc(); + } + + private static DocSet loadDegree(int degree) throws IOException { + + + + + switch (degree) + { + case 0: + DocSet docSet = DocSetFactory.getDocSetInstance(2448149, 2448149, 1, DocSetFactory.FOCUS.OPTIMAL); + docSet.addDoc(2448149); + return docSet; + case 1: + { + BufferedReader bfr = new BufferedReader(new FileReader(new File("/Users/abhasin/degree1s.txt"))); + DocSet d1 = new IntArrayDocIdSet(); + + while(true) + { + String line = bfr.readLine(); + if(line == null||line == "") + return d1; + else + d1.addDoc(Integer.parseInt(line.trim())); + } + } + case 2: + { + BufferedReader bfr = new BufferedReader(new FileReader(new File("/Users/abhasin/degree2s.txt"))); + DocSet d2 = new IntArrayDocIdSet(); + while(true) + { + String line = bfr.readLine(); + if(line == null||line == "") + return d2; + else + d2.addDoc(Integer.parseInt(line.trim())); + } + + } + case 3: + { + BufferedReader bfr = new BufferedReader(new FileReader(new File("/Users/abhasin/degree3s.txt"))); + DocSet d3 = new P4DDocIdSet(); + ArrayList data = new ArrayList(); + + while(true) + { + String line = bfr.readLine(); + if(line == null||line == "") + break; + else + { + data.add(Integer.parseInt(line.trim())); + } + + } + Collections.sort(data); + for(Integer d : data) + { + System.out.println(d); + d3.addDoc(d); + } + return d3; + } + + + } + return null; + + + } +} Index: contrib/kamikaze/test/org/apache/lucene/kamikaze/test/perf/TestSizeEstimates.java =================================================================== --- contrib/kamikaze/test/org/apache/lucene/kamikaze/test/perf/TestSizeEstimates.java (revision 0) +++ contrib/kamikaze/test/org/apache/lucene/kamikaze/test/perf/TestSizeEstimates.java (revision 0) @@ -0,0 +1,269 @@ +package org.apache.lucene.kamikaze.test.perf; + +import java.lang.reflect.Array; +import java.lang.reflect.Constructor; +import java.util.ArrayList; +import java.util.BitSet; + +import org.apache.lucene.kamikaze.docidset.bitset.MyOpenBitSet; +import org.apache.lucene.kamikaze.docidset.utils.IntArray; +import org.apache.lucene.kamikaze.docidset.utils.LongSegmentArray; +import org.apache.lucene.kamikaze.docidset.utils.MyOpenBitSetArray; + + +public class TestSizeEstimates +{ + + public static void estimateIntArraySize(int size) + { + try { + Class clazz = IntArray.class; + System.out.println(clazz.getName()+":"+sizeOf(clazz,size)); + } catch (Exception e) { + e.printStackTrace(); + } + } + + + public static void estimateBitSetSize(int size) + { + try { + Class clazz = BitSet.class; + System.out.println(clazz.getName()+":"+sizeOf(clazz,size)); + } catch (Exception e) { + e.printStackTrace(); + } + } + + public static void estimateMyOpenBitSetSize(int size) + { + try { + Class clazz = MyOpenBitSet.class; + System.out.println(clazz.getName()+":"+sizeOf(clazz,size)); + } catch (Exception e) { + e.printStackTrace(); + } + } + + + + private static void estimateArrayListSize(int size) { + try { + Class clazz = ArrayList.class; + System.out.println(clazz.getName()+":"+sizeOf(clazz,size)); + } catch (Exception e) { + e.printStackTrace(); + } + } + + + + + public static long sizeOf(Class clazz, int userData) { + long size= 0; + Object[] objects = new Object[100]; + try { + + Constructor c; + try{ + c = clazz.getConstructor(long.class); + } + catch(Exception e) + { + c = null; + } + + if(c == null) + c= clazz.getConstructor(int.class); + + Object primer = c.newInstance(userData); + long startingMemoryUse = getUsedMemory(); + for (int i = 0; i < objects.length; i++) { + objects[i] = c.newInstance(userData); + fill(objects[i], userData); + optimize(objects[i]); + } + long endingMemoryUse = getUsedMemory(); + float approxSize = (endingMemoryUse - + startingMemoryUse)/100f ; + size = Math.round(approxSize); + } catch (Exception e) { + e.printStackTrace(); + System.out.println("WARNING:couldn't instantiate" + +clazz); + e.printStackTrace(); + } + return size; + } + + private static void estimateNativeIntArraySize(int userData) { + + int array[] = (int[])Array.newInstance(int.class, userData); + + long size= 0; + Object[] objects = new Object[100]; + try { + + long startingMemoryUse = getUsedMemory(); + for (int i = 0; i < objects.length; i++) { + objects[i] = (int[])Array.newInstance(int.class, userData); + fill(objects[i], userData); + optimize(objects[i]); + } + long endingMemoryUse = getUsedMemory(); + float approxSize = (endingMemoryUse - + startingMemoryUse) /100f; + size = Math.round(approxSize); + } catch (Exception e) { + e.printStackTrace(); + System.out.println("WARNING:couldn't instantiate Native Int Array"); + + } + System.out.println(int[].class.getName()+":"+size); + } + + + private static void estimateMyOpenBitSetArraySize(int userData) { + + try { + Class clazz = MyOpenBitSetArray.class; + System.out.println(clazz.getName()+":"+sizeOf(clazz,userData)); + } catch (Exception e) { + e.printStackTrace(); + } + } + + + private static void estimateLongSegmentArraySize(int userData) { + // TODO Auto-generated method stub + try { + Class clazz = LongSegmentArray.class; + System.out.println(clazz.getName()+":"+sizeOf(clazz,userData)); + } catch (Exception e) { + e.printStackTrace(); + } + } + + private static void fill(Object object, int userData) { + + if(object instanceof MyOpenBitSet) + ((MyOpenBitSet)object).set(userData-1); + else if(object instanceof ArrayList) + { + for(int i = 0 ;i < userData; i++ ) + { + ((ArrayList)object).add(new Integer(10)); + } + } + else if(object instanceof IntArray) + { + for(int i = 0 ;i < userData; i++ ) + { + ((IntArray)object).set(i, 10); + } + } + else if(object instanceof int[] ) + { + for(int i = 0 ;i < userData; i++ ) + { + ( (int[])object)[i] = 10; + } + } + else if(object instanceof long[] ) + { + for(int i = 0 ;i < userData; i++ ) + { + ( (long[])object)[i] = 10; + } + } + else if(object instanceof MyOpenBitSetArray) + { + for(int i = 0 ;i < userData; i++ ) + { + + ((MyOpenBitSetArray) object).add(new MyOpenBitSet(1200)); + fill(((MyOpenBitSetArray) object).get(i) ,1200); + } + } + else if(object instanceof LongSegmentArray) + { + for(int i = 0 ;i < userData; i++ ) + { + ((LongSegmentArray) object).add(new long[2000>>>6]); + fill(((LongSegmentArray) object) .get(i),2000>>>6); + } + } + + } + +private static void optimize(Object object) { + + if(object instanceof MyOpenBitSet) + ((MyOpenBitSet)object).trimTrailingZeros(); + else if(object instanceof ArrayList) + ((ArrayList)object).trimToSize(); + else if(object instanceof IntArray) + ((IntArray)object).seal(); + + + + } + + +private static long getUsedMemory() { + gc(); + long totalMemory = Runtime.getRuntime().totalMemory(); + gc(); + long freeMemory = Runtime.getRuntime().freeMemory(); + long usedMemory = totalMemory - freeMemory; + return usedMemory; + } + private static void gc() { + try { + System.gc(); + Thread.currentThread().sleep(100); + System.runFinalization(); + Thread.currentThread().sleep(100); + System.gc(); + Thread.currentThread().sleep(100); + System.runFinalization(); + Thread.currentThread().sleep(100); + + } catch (Exception e) { + e.printStackTrace(); + } + } + public static void main(String[] args) { + //estimateMyOpenBitSetSize(1200); + //estimateBitSetSize(1200); + estimateIntArraySize(1); + estimateIntArraySize(10); + estimateIntArraySize(100); + estimateIntArraySize(1000); + estimateIntArraySize(10000); + + + //estimateArrayListSize(1024); + //estimateMyOpenBitSetArraySize(32000); + estimateLongSegmentArraySize(1); + estimateLongSegmentArraySize(10); + estimateLongSegmentArraySize(100); + estimateLongSegmentArraySize(1000); + estimateLongSegmentArraySize(10000); + + estimateNativeIntArraySize(1); + estimateNativeIntArraySize(10); + estimateNativeIntArraySize(100); + estimateNativeIntArraySize(1000); + estimateNativeIntArraySize(10000); + + + + //estimateLongSegmentArraySize(32000); + System.exit(0); + } + + + + +}