Index: src/java/org/apache/lucene/search/TestIteratorPerf.java =================================================================== --- src/java/org/apache/lucene/search/TestIteratorPerf.java (revision 0) +++ src/java/org/apache/lucene/search/TestIteratorPerf.java (revision 0) @@ -0,0 +1,309 @@ +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + +import org.apache.lucene.search.docsets.AndDocIdSetIterator; +import org.apache.lucene.search.docsets.OrDocIdSetIterator; +import org.apache.lucene.util.OpenBitSet; +import org.apache.lucene.util.OpenBitSetIterator; +import org.apache.lucene.util.OpenBitSetIteratorExperiment; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * + * @version $Id$ + */ +public class TestIteratorPerf { + //Random r = new Random(0); + static final int ITER = 10; + static final int ITERATOR_SIZE = 10000000; + static final double DENSITY = 1.0;//Gaussian... 1.0 != 100% + + private static final Random random = new Random(7); + + public int doOld(int iter, OpenBitSet bs) { + int doc=-1; + for (int i=0; i bsList = new ArrayList(numBS); + for (int i=0;i>>32; } + if ((word & 0x0000FFFF) == 0) { wordShift +=16; word >>>=16; } + if ((word & 0x000000FF) == 0) { wordShift +=8; word >>>=8; } + indexArray = bitlist[(int)word & 0xff]; + } + + /***** alternate shift implementations + // 32 bit shifts, but a long shift needed at the end + private void shift2() { + int y = (int)word; + if (y==0) {wordShift +=32; y = (int)(word >>>32); } + if ((y & 0x0000FFFF) == 0) { wordShift +=16; y>>>=16; } + if ((y & 0x000000FF) == 0) { wordShift +=8; y>>>=8; } + indexArray = bitlist[y & 0xff]; + word >>>= (wordShift +1); + } + + private void shift3() { + int lower = (int)word; + int lowByte = lower & 0xff; + if (lowByte != 0) { + indexArray=bitlist[lowByte]; + return; + } + shift(); + } + ******/ + + public int next(){ + if (indexArray==0) { + if (word!=0) { + word >>>= 8; + wordShift += 8; + } + + while (word==0) { + if (++i >= words) { + return -1; + } + word = arr[i]; + wordShift =-1; // loop invariant code motion should move this + } + + // after the first time, should I go with a linear search, or + // stick with the binary search in shift? + shift(); + } + + int bitIndex = (indexArray & 0x0f) + wordShift; + indexArray >>>= 4; + // should i<<6 be cached as a separate variable? + // it would only save one cycle in the best circumstances. + return (i<<6) + bitIndex; + } + + public int skipTo(int target) { + indexArray=0; + i = target >> 6; + if (i>=words) { + word =0; // setup so next() will also return -1 + return -1; + } + wordShift = target & 0x3f; + word = arr[i] >>> wordShift; + if (word !=0) { + wordShift--; // compensate for 1 based arrIndex + } else { + while (word ==0) { + if (++i >= words) { + return -1; + } + word = arr[i]; + } + wordShift =-1; + } + + shift(); + + int bitIndex = (indexArray & 0x0f) + wordShift; + indexArray >>>= 4; + // should i<<6 be cached as a separate variable? + // it would only save one cycle in the best circumstances. + return (i<<6) + bitIndex; + } + +} Index: src/test/org/apache/lucene/search/docsets/AndDocIdSet.java =================================================================== --- src/test/org/apache/lucene/search/docsets/AndDocIdSet.java (revision 0) +++ src/test/org/apache/lucene/search/docsets/AndDocIdSet.java (revision 0) @@ -0,0 +1,28 @@ +package org.apache.lucene.search.docsets; + +import java.io.Serializable; +import java.util.List; + +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; + +public class AndDocIdSet extends DocIdSet implements Serializable { + private static final long serialVersionUID = 1L; + + private List _sets = null; + + public AndDocIdSet(List docSets) { + this._sets = docSets; + int size = 0; + if (_sets != null) { + for(DocIdSet set : _sets) { + if(set != null) size++; + } + } + } + + public final DocIdSetIterator iterator() { + //return new AndDocIdSetIterator(_sets); + return new AndDocIdSetIterator(_sets); + } +} Index: src/test/org/apache/lucene/search/docsets/AndDocIdSetIterator.java =================================================================== --- src/test/org/apache/lucene/search/docsets/AndDocIdSetIterator.java (revision 0) +++ src/test/org/apache/lucene/search/docsets/AndDocIdSetIterator.java (revision 0) @@ -0,0 +1,105 @@ +package org.apache.lucene.search.docsets; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Comparator; +import java.util.List; + +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; + +public final class AndDocIdSetIterator extends DocIdSetIterator { + private DocIdSetIterator[] iterators = null; + private boolean firstTime=true; + private boolean more; + private int lastDoc=-1; + private int lastindex; + + public AndDocIdSetIterator(List sets) { + iterators = new DocIdSetIterator[sets.size()]; + int j = 0; + for (DocIdSet set : sets) { + if (set != null) { + DocIdSetIterator dcit = set.iterator(); + iterators[j++] = dcit; + } + } + lastindex=iterators.length - 1; + } + + private final boolean doNext() throws IOException { + int first=0; + DocIdSetIterator lastIter = iterators[lastindex]; + DocIdSetIterator firstIter; + while (more && (firstIter=iterators[first]).doc() < (lastDoc=lastIter.doc())) { + more = firstIter.skipTo(lastDoc); + lastIter = firstIter; + first = (first == (lastindex)) ? 0 : first+1; + } + return more; + } + + @Override + public final int doc() { + return lastDoc; + } + + @Override + public final boolean next() throws IOException { + if (firstTime) + return init(0); + else if (more) + more = iterators[lastindex].next(); + return doNext(); + } + + // Note... most of this could be done in the constructor + // thus skipping a check for firstTime per call to next() and skipTo() + private final boolean init(int target) throws IOException { + firstTime=false; + more = iterators.length>1; + for (int i=0; i() { // sort the array + public int compare(DocIdSetIterator o1, DocIdSetIterator o2) { + return o1.doc() - o2.doc(); + } + }); + + doNext(); + + // If first-time skip distance is any predictor of + // scorer sparseness, then we should always try to skip first on + // those scorers. + // Keep last scorer in it's last place (it will be the first + // to be skipped on), but reverse all of the others so that + // they will be skipped on in order of original high skip. + int end=lastindex; + for (int i=0; i<(end>>1); i++) { + DocIdSetIterator tmp = iterators[i]; + iterators[i] = iterators[end-i-1]; + iterators[end-i-1] = tmp; + } + + return more; + } + + @Override + public boolean skipTo(int target) throws IOException { + if (firstTime) + return init(target); + else if (more) + more = iterators[lastindex].skipTo(target); + return doNext(); + } + +} Index: src/test/org/apache/lucene/search/docsets/NotDocIdSet.java =================================================================== --- src/test/org/apache/lucene/search/docsets/NotDocIdSet.java (revision 0) +++ src/test/org/apache/lucene/search/docsets/NotDocIdSet.java (revision 0) @@ -0,0 +1,27 @@ +package org.apache.lucene.search.docsets; + +import java.io.Serializable; + +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; + + +public class NotDocIdSet extends DocIdSet implements Serializable { + + private static final long serialVersionUID = 1L; + + private DocIdSet innerSet = null; + + private int max = -1; + + public NotDocIdSet(DocIdSet docSet, int maxVal) { + innerSet = docSet; + max = maxVal; + + } + + @Override + public DocIdSetIterator iterator() { + return new NotDocIdSetIterator(innerSet,max); + } +} Index: src/test/org/apache/lucene/search/docsets/NotDocIdSetIterator.java =================================================================== --- src/test/org/apache/lucene/search/docsets/NotDocIdSetIterator.java (revision 0) +++ src/test/org/apache/lucene/search/docsets/NotDocIdSetIterator.java (revision 0) @@ -0,0 +1,63 @@ +package org.apache.lucene.search.docsets; + +import java.io.IOException; + +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; + +public class NotDocIdSetIterator extends DocIdSetIterator { + int lastReturn = -1; + private DocIdSetIterator it1 = null; + private final DocIdSet innerSet; + private final int max; + public NotDocIdSetIterator(DocIdSet set,int max) { + innerSet = set; + this.max = max; + initialize(); + } + + private void initialize() { + it1 = innerSet.iterator(); + + try { + if (!it1.next()) it1 = null; + } catch (IOException e) { + e.printStackTrace(); + } + } + + @Override + public int doc() { + return lastReturn; + } + + @Override + public boolean next() throws IOException { + return skipTo(lastReturn + 1); + } + + @Override + public boolean skipTo(int target) throws IOException { + + if (target <= lastReturn) target = lastReturn + 1; + + lastReturn = target; + + if (lastReturn >= max) return false; + + if (it1 != null && it1.doc() < lastReturn) { + if (!it1.skipTo(lastReturn)) { + it1 = null; + } + } + + while (it1 != null && it1.doc() == lastReturn) { + lastReturn++; + if (lastReturn >= max) return false; + if (!it1.skipTo(lastReturn)) { + it1 = null; + } + } + return true; + } + } \ No newline at end of file Index: src/test/org/apache/lucene/search/docsets/OrDocIdSet.java =================================================================== --- src/test/org/apache/lucene/search/docsets/OrDocIdSet.java (revision 0) +++ src/test/org/apache/lucene/search/docsets/OrDocIdSet.java (revision 0) @@ -0,0 +1,29 @@ +package org.apache.lucene.search.docsets; + +import java.io.Serializable; +import java.util.List; + +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; + + +public class OrDocIdSet extends DocIdSet implements Serializable { + private static final long serialVersionUID = 1L; + + private List sets = null; + + public OrDocIdSet(List docSets) { + this.sets = docSets; + int size = 0; + if (sets != null) { + for(DocIdSet set : sets) { + if(set != null) size++; + } + } + } + + @Override + public DocIdSetIterator iterator() { + return new OrDocIdSetIterator(sets); + } +} Index: src/test/org/apache/lucene/search/docsets/OrDocIdSetIterator.java =================================================================== --- src/test/org/apache/lucene/search/docsets/OrDocIdSetIterator.java (revision 0) +++ src/test/org/apache/lucene/search/docsets/OrDocIdSetIterator.java (revision 0) @@ -0,0 +1,156 @@ +package org.apache.lucene.search.docsets; + +import java.io.IOException; +import java.util.List; + +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; + +public class OrDocIdSetIterator extends DocIdSetIterator { + + private final class Item + { + public final DocIdSetIterator iter; + public int doc; + public Item(DocIdSetIterator iter) + { + this.iter = iter; + this.doc = -1; + } + } + private int _curDoc; + private final Item[] _heap; + private int _size; + + public OrDocIdSetIterator(List sets) + { + _curDoc = -1; + _heap = new Item[sets.size()]; + _size = 0; + for(DocIdSet set : sets) + { + _heap[_size++] = new Item(set.iterator()); + } + } + + @Override + public final int doc() { + return _curDoc; + } + + @Override + public final boolean next() throws IOException + { + if(_size <= 0) return false; + + Item top = _heap[0]; + while(true) + { + DocIdSetIterator topIter = top.iter; + if(topIter.next()) + { + top.doc = topIter.doc(); + heapAdjust(); + } + else + { + heapRemoveRoot(); + if(_size == 0) return false; + } + top = _heap[0]; + int topDoc = top.doc; + if(topDoc > _curDoc) + { + _curDoc = topDoc; + return true; + } + } + } + + @Override + public final boolean skipTo(int target) throws IOException + { + if(_size <= 0) return false; + + if(target <= _curDoc) target = _curDoc + 1; + + Item top = _heap[0]; + while(true) + { + DocIdSetIterator topIter = top.iter; + if(topIter.skipTo(target)) + { + top.doc = topIter.doc(); + heapAdjust(); + } + else + { + heapRemoveRoot(); + if (_size == 0) return false; + } + top = _heap[0]; + int topDoc = top.doc; + if(topDoc >= target) + { + _curDoc = topDoc; + return true; + } + } + } + + /* The subtree of subScorers at root is a min heap except possibly for its root element. + * Bubble the root down as required to make the subtree a heap. + */ + private final void heapAdjust() + { + final Item[] heap = _heap; + final Item top = heap[0]; + final int doc = top.doc; + final int size = _size; + int i = 0; + + while(true) + { + int lchild = (i<<1)+1; + if(lchild >= size) break; + + Item left = heap[lchild]; + int ldoc = left.doc; + + int rchild = lchild+1; + if(rchild < size){ + Item right = heap[rchild]; + int rdoc = right.doc; + + if(rdoc <= ldoc) + { + if(doc <= rdoc) break; + + heap[i] = right; + i = rchild; + continue; + } + } + + if(doc <= ldoc) break; + + heap[i] = left; + i = lchild; + } + heap[i] = top; + } + + // Remove the root Scorer from subScorers and re-establish it as a heap + private final void heapRemoveRoot() + { + _size--; + if (_size > 0) + { + Item tmp = _heap[0]; + _heap[0] = _heap[_size]; + _heap[_size] = tmp; // keep the finished iterator at the end for debugging + heapAdjust(); + } + } + +}