Index: src/java/org/apache/lucene/search/DisjunctionDISI.java
===================================================================
--- src/java/org/apache/lucene/search/DisjunctionDISI.java (revision 0)
+++ src/java/org/apache/lucene/search/DisjunctionDISI.java (revision 0)
@@ -0,0 +1,196 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.List;
+import java.util.Iterator;
+import java.io.IOException;
+
+import org.apache.lucene.util.DISIQueue;
+
+/** Disjunction for DISIs
+ */
+class DisjunctionDISI extends DocIdSetIterator {
+ /** The number of subscorers. */
+ private final int nrScorers;
+
+ /** The subscorers. */
+ protected final List subScorers;
+
+ /** The minimum number of scorers that should match. */
+ private final int minimumNrMatchers;
+
+ /** The scorerDocQueue contains all subscorers ordered by their current doc(),
+ * with the minimum at the top.
+ *
The scorerDocQueue is initialized the first time next() or skipTo() is called.
+ *
An exhausted scorer is immediately removed from the scorerDocQueue.
+ *
If less than the minimumNrMatchers scorers
+ * remain in the scorerDocQueue next() and skipTo() return false.
+ *
+ * After each to call to next() or skipTo()
+ * currentSumScore is the total score of the current matching doc,
+ * nrMatchers is the number of matching scorers,
+ * and all scorers are after the matching doc, or are exhausted.
+ */
+ private DISIQueue disiQueue = null;
+
+ /** The document number of the current match. */
+ private int currentDoc = -1;
+
+ /** The number of Iterators that provide the current match. */
+ protected int nrMatchers = -1;
+
+ /** Construct a DisjunctionScorer.
+ * @param subScorers A collection of at least two subscorers.
+ * @param minimumNrMatchers The positive minimum number of subscorers that should
+ * match to match this query.
+ *
When minimumNrMatchers is bigger than
+ * the number of subScorers,
+ * no matches will be produced.
+ *
When minimumNrMatchers equals the number of subScorers,
+ * it more efficient to use ConjunctionScorer.
+ * @throws IOException
+ */
+ public DisjunctionDISI( List subScorers, int minimumNrMatchers) throws IOException {
+ nrScorers = subScorers.size();
+
+ if (minimumNrMatchers <= 0) {
+ throw new IllegalArgumentException("Minimum nr of matchers must be positive");
+ }
+ if (nrScorers <= 1) {
+ throw new IllegalArgumentException("There must be at least 2 subScorers");
+ }
+
+ this.minimumNrMatchers = minimumNrMatchers;
+ this.subScorers = subScorers;
+
+ }
+
+ /** Construct a DisjunctionScorer, using one as the minimum number
+ * of matching subscorers.
+ * @throws IOException
+ */
+ public DisjunctionDISI(List subScorers) throws IOException {
+ this(subScorers, 1);
+ }
+
+ /** Called the first time next() or skipTo() is called to
+ * initialize scorerDocQueue.
+ */
+ private void initDISIQueue() throws IOException {
+ Iterator si = subScorers.iterator();
+ disiQueue = new DISIQueue(nrScorers);
+
+ while (si.hasNext()) {
+ DocIdSetIterator se = (DocIdSetIterator) si.next();
+ if (se.next()) { // doc() method will be used in scorerDocQueue.
+ disiQueue.insert(se);
+ }
+ }
+ }
+
+
+ public boolean next() throws IOException {
+ if(disiQueue==null) initDISIQueue();
+
+ return (disiQueue.size() >= minimumNrMatchers)
+ && advanceAfterCurrent();
+ }
+
+
+ /** Advance all subscorers after the current document determined by the
+ * top of the scorerDocQueue.
+ * Repeat until at least the minimum number of subscorers match on the same
+ * document and all subscorers are after that document or are exhausted.
+ *
On entry the scorerDocQueue has at least minimumNrMatchers
+ * available. At least the scorer with the minimum document number will be advanced.
+ * @return true iff there is a match.
+ *
In case there is a match, currentDoc, currentSumScore,
+ * and nrMatchers describe the match.
+ *
+ * @todo Investigate whether it is possible to use skipTo() when
+ * the minimum number of matchers is bigger than one, ie. try and use the
+ * character of ConjunctionScorer for the minimum number of matchers.
+ * Also delay calling score() on the sub scorers until the minimum number of
+ * matchers is reached.
+ *
For this, a DocIdSetIterator array with minimumNrMatchers elements might
+ * hold Scorers at currentDoc that are temporarily popped from scorerQueue.
+ */
+ protected boolean advanceAfterCurrent() throws IOException {
+ do { // repeat until minimum nr of matchers
+ currentDoc = disiQueue.topDoc();
+ nrMatchers = 1;
+ do { // Until all subscorers are after currentDoc
+ if (! disiQueue.topNextAndAdjustElsePop()) {
+ if (disiQueue.size()== 0) {
+ break; // nothing more to advance, check for last match.
+ }
+ }
+ if (disiQueue.topDoc() != currentDoc) {
+ break; // All remaining subscorers are after currentDoc.
+ }
+ nrMatchers++;
+ } while (true);
+
+ if (nrMatchers >= minimumNrMatchers) {
+ return true;
+ } else if (disiQueue.size() < minimumNrMatchers) {
+ return false;
+ }
+ } while (true);
+ }
+
+
+ public int doc() { return currentDoc; }
+
+ /** Returns the number of subscorers matching the current document.
+ * Initially invalid, until {@link #next()} is called the first time.
+ */
+ public int nrMatchers() {
+ return nrMatchers;
+ }
+
+ /** Skips to the first match beyond the current whose document number is
+ * greater than or equal to a given target.
+ *
When this method is used the {@link #explain(int)} method should not be used.
+ *
The implementation uses the skipTo() method on the subscorers.
+ * @param target The target document number.
+ * @return true iff there is such a match.
+ */
+ public boolean skipTo(int target) throws IOException {
+ if(disiQueue==null) initDISIQueue();
+
+ if (disiQueue.size() < minimumNrMatchers) {
+ return false;
+ }
+ if (target <= currentDoc) {
+ return true;
+ }
+ do {
+ if (disiQueue.topDoc() >= target) {
+ return advanceAfterCurrent();
+ } else if (! disiQueue.topSkipToAndAdjustElsePop(target)) {
+ if (disiQueue.size() < minimumNrMatchers) {
+ return false;
+ }
+ }
+ } while (true);
+ }
+
+
+}
Index: src/java/org/apache/lucene/util/DISIQueue.java
===================================================================
--- src/java/org/apache/lucene/util/DISIQueue.java (revision 0)
+++ src/java/org/apache/lucene/util/DISIQueue.java (revision 0)
@@ -0,0 +1,196 @@
+package org.apache.lucene.util;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Derived from org.apache.lucene.util.PriorityQueue of March 2005 */
+
+import java.io.IOException;
+
+import org.apache.lucene.search.DocIdSetIterator;
+
+/** A ScorerDocQueue maintains a partial ordering of its DISIs such that the
+ least DISI can always be found in constant time. Put()'s and pop()'s
+ require log(size) time. The ordering is by DISI.doc().
+ */
+public class DISIQueue {
+ private final DocIdSetIterator[] heap;
+ private final int maxSize;
+ private int size;
+
+
+ private DocIdSetIterator topDISI; // same as heap[1], only for speed
+
+ /** Create a ScorerDocQueue with a maximum size. */
+ public DISIQueue(int maxSize) {
+ // assert maxSize >= 0;
+ size = 0;
+ int heapSize = maxSize + 1;
+ heap = new DocIdSetIterator[heapSize];
+ this.maxSize = maxSize;
+ topDISI = heap[1]; // initially null
+ }
+
+ /**
+ * Adds a DocIdSetIterator to a ScorerDocQueue in log(size) time.
+ * If one tries to add more Scorers than maxSize
+ * a RuntimeException (ArrayIndexOutOfBound) is thrown.
+ */
+ public final void put(DocIdSetIterator disi) {
+ size++;
+ heap[size] = disi;
+ upHeap();
+ }
+
+ /**
+ * Adds a DocIdSetIterator to the DISIQueue in log(size) time if either
+ * the DISIQueue is not full, or not lessThan(scorer, top()).
+ * @param disi
+ * @return true if scorer is added, false otherwise.
+ */
+ public boolean insert(DocIdSetIterator disi){
+ if (size < maxSize) {
+ put(disi);
+ return true;
+ } else {
+
+ if ((size > 0) && (! (disi.doc() < topDISI.doc()))) { // heap[1] is top()
+ heap[1] = disi;
+ downHeap();
+ return true;
+ } else {
+ return false;
+ }
+ }
+ }
+
+ /** Returns the least DocIdSetIterator of the ScorerDocQueue in constant time.
+ * Should not be used when the queue is empty.
+ */
+ public final DocIdSetIterator top() {
+ // assert size > 0;
+ return topDISI;
+ }
+
+ /** Returns document number of the least DocIdSetIterator of the ScorerDocQueue
+ * in constant time.
+ * Should not be used when the queue is empty.
+ */
+ public final int topDoc() {
+ // assert size > 0;
+ return topDISI.doc();
+ }
+
+
+ public final boolean topNextAndAdjustElsePop() throws IOException {
+ return checkAdjustElsePop( topDISI.next());
+ }
+
+ public final boolean topSkipToAndAdjustElsePop(int target) throws IOException {
+ return checkAdjustElsePop( topDISI.skipTo(target));
+ }
+
+ private boolean checkAdjustElsePop(boolean cond) {
+ if (!cond) { // see also popNoResult
+ heap[1] = heap[size]; // move last to first
+ heap[size] = null;
+ size--;
+ }
+ downHeap();
+ return cond;
+ }
+
+ /** Removes and returns the least scorer of the ScorerDocQueue in log(size)
+ * time.
+ * Should not be used when the queue is empty.
+ */
+ public final DocIdSetIterator pop() {
+ // assert size > 0;
+ DocIdSetIterator result = topDISI;
+ popNoResult();
+ return result;
+ }
+
+ /** Removes the least scorer of the ScorerDocQueue in log(size) time.
+ * Should not be used when the queue is empty.
+ */
+ private final void popNoResult() {
+ heap[1] = heap[size]; // move last to first
+ heap[size] = null;
+ size--;
+ downHeap(); // adjust heap
+ }
+
+ /** Should be called when the scorer at top changes doc() value.
+ * Still log(n) worst case, but it's at least twice as fast to
+ * { pq.top().change(); pq.adjustTop(); }
+ * instead of
+ * { o = pq.pop(); o.change(); pq.push(o); }
+ *
+ */
+ public final void adjustTop() {
+ // assert size > 0;
+ downHeap();
+ }
+
+ /** Returns the number of scorers currently stored in the ScorerDocQueue. */
+ public final int size() {
+ return size;
+ }
+
+ /** Removes all entries from the ScorerDocQueue. */
+ public final void clear() {
+ for (int i = 0; i <= size; i++) {
+ heap[i] = null;
+ }
+ size = 0;
+ }
+
+ private final void upHeap() {
+ int i = size;
+ DocIdSetIterator node = heap[i]; // save bottom node
+ int j = i >>> 1;
+ while ((j > 0) && (node.doc() < heap[j].doc())) {
+ heap[i] = heap[j]; // shift parents down
+ i = j;
+ j = j >>> 1;
+ }
+ heap[i] = node; // install saved node
+ topDISI = heap[1];
+ }
+
+ private final void downHeap() {
+ int i = 1;
+ DocIdSetIterator node = heap[i]; // save top node
+ int j = i << 1; // find smaller child
+ int k = j + 1;
+ if ((k <= size) && (heap[k].doc() < heap[j].doc())) {
+ j = k;
+ }
+ while ((j <= size) && (heap[j].doc() < node.doc())) {
+ heap[i] = heap[j]; // shift up child
+ i = j;
+ j = i << 1;
+ k = j + 1;
+ if (k <= size && (heap[k].doc() < heap[j].doc())) {
+ j = k;
+ }
+ }
+ heap[i] = node; // install saved node
+ topDISI = heap[1];
+ }
+}
Index: src/test/org/apache/lucene/search/TestDisjunctionDISI.java
===================================================================
--- src/test/org/apache/lucene/search/TestDisjunctionDISI.java (revision 0)
+++ src/test/org/apache/lucene/search/TestDisjunctionDISI.java (revision 0)
@@ -0,0 +1,107 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.List;
+import java.util.Random;
+
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+import junit.textui.TestRunner;
+
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.DocIdBitSet;
+
+public class TestDisjunctionDISI extends TestCase {
+ /** Main for running test case by itself. */
+ public static void main(String args[]) {
+ TestRunner.run(new TestSuite(TestDisjunctionDISI.class));
+ }
+
+ static Random rand = new Random();
+ static final int MAX_BIT_SET_SIZE = 1;
+
+ public void testDisjunction() throws IOException{
+ for(int iter = 0; iter < 1000 ; iter++){
+ BitSet bs1 = generateBitSet(rand.nextInt(MAX_BIT_SET_SIZE));
+ BitSet bs2 = generateBitSet(rand.nextInt(MAX_BIT_SET_SIZE));
+
+ //Test minimum Nr should match == 1
+ BitSet result = new BitSet();
+ result.or(bs1); result.or(bs2);
+
+ List disis = new ArrayList();
+ disis.add((new DocIdBitSet(bs1)).iterator());
+ disis.add((new DocIdBitSet(bs2)).iterator());
+
+ DisjunctionDISI disjunction = new DisjunctionDISI(disis,1);
+
+ assertTrue(disjunction.doc()==-1);
+ assertTrue(validateViaNext(result, disjunction));
+
+ disjunction = new DisjunctionDISI(disis,1);//no restart()...
+ assertTrue(validateViaSkipTo(result, disjunction));
+
+
+ //Test minimumNrShouldMatch(2);
+ result = new BitSet();
+ result.or(bs1); result.and(bs2);
+ disjunction = new DisjunctionDISI(disis,2);
+
+ assertTrue(disjunction.doc()==-1);
+ assertTrue(validateViaNext(result, disjunction));
+
+ disjunction = new DisjunctionDISI(disis,2);//no restart()...
+ assertTrue(validateViaSkipTo(result, disjunction));
+
+ }
+ }
+
+
+ private BitSet generateBitSet(int maxSize){
+ BitSet bs = new BitSet();
+ for(int i =0; i