ConjunctionScorer.
* This Scorer implements {@link Scorer#advance(int)} and uses advance() on the given Scorers.
*/
+
class DisjunctionSumScorer extends Scorer {
- /** The number of subscorers. */
- private final int nrScorers;
-
- /** The subscorers. */
- protected final List
- * After each to call to nextDoc() or advance()
- * currentSumScore is the total score of the current matching doc,
- * nrMatchers is the number of matching scorers,
- * and all scorers are after the matching doc, or are exhausted.
- */
- private ScorerDocQueue scorerDocQueue;
-
- /** The document number of the current match. */
- private int currentDoc = -1;
+ private int numScorers;
+ private int doc = -1;
+ protected int nrMatchers;
+ private float score;
- /** The number of subscorers that provide the current match. */
- protected int nrMatchers = -1;
-
- private float currentScore = Float.NaN;
-
- /** Construct a DisjunctionScorer.
- * @param subScorers A collection of at least two subscorers.
- * @param minimumNrMatchers The positive minimum number of subscorers that should
- * match to match this query.
- *
When minimumNrMatchers is bigger than
- * the number of subScorers,
- * no matches will be produced.
- *
When minimumNrMatchers equals the number of subScorers,
- * it more efficient to use ConjunctionScorer.
- */
- public DisjunctionSumScorer( ListDisjunctionScorer, using one as the minimum number
- * of matching subscorers.
- */
- public DisjunctionSumScorer(ListscorerDocQueue.
+ @Override
+ public int nextDoc() throws IOException {
+ while(true) {
+ while (subScorers[0].docID() == doc) {
+ if (subScorers[0].nextDoc() != NO_MORE_DOCS) {
+ heapAdjust(0);
+ } else {
+ heapRemoveRoot();
+ if (numScorers < minimumNrMatchers) {
+ return doc = NO_MORE_DOCS;
+ }
+ }
+ }
+ doc = subScorers[0].docID();
+ if (minimumNrMatchers > 1) {
+ nrMatchers = 1;
+ countMatches(1);
+ countMatches(2);
+ if (nrMatchers >= minimumNrMatchers) {
+ break;
+ }
+ } else {
+ break;
+ }
+ }
+
+ return doc;
+ }
+
+ private void countMatches(int root) throws IOException {
+ if (root < numScorers && subScorers[root].docID() == doc) {
+ nrMatchers++;
+ countMatches((root<<1)+1);
+ countMatches((root<<1)+2);
+ }
+ }
+ // only valid after .score() has been invoked
+ public int nrMatchers() {
+ return nrMatchers;
+ }
+
+ @Override
+ public int docID() {
+ return doc;
+ }
+
+ /** Determine the current document score. Initially invalid, until {@link #nextDoc()} is called the first time.
+ * @return the score of the current generated document
*/
- private void initScorerDocQueue() throws IOException {
- scorerDocQueue = new ScorerDocQueue(nrScorers);
- for (Scorer se : subScorers) {
- if (se.nextDoc() != NO_MORE_DOCS) {
- scorerDocQueue.insert(se);
+ @Override
+ public float score() throws IOException {
+ nrMatchers = 1;
+ score = subScorers[0].score();
+ scoreAll(1);
+ scoreAll(2);
+ return score;
+ }
+
+ // Recursively iterate all subScorers that generated last doc computing sum and max
+ private void scoreAll(int root) throws IOException {
+ if (root < numScorers && subScorers[root].docID() == doc) {
+ nrMatchers++;
+ score += subScorers[root].score();
+ scoreAll((root<<1)+1);
+ scoreAll((root<<1)+2);
+ }
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ if (numScorers == 0) return doc = NO_MORE_DOCS;
+ while (subScorers[0].docID() < target) {
+ if (subScorers[0].advance(target) != NO_MORE_DOCS) {
+ heapAdjust(0);
+ } else {
+ heapRemoveRoot();
+ if (numScorers == 0) {
+ return doc = NO_MORE_DOCS;
+ }
+ }
+ }
+
+ if (minimumNrMatchers > 1) {
+ while(true) {
+ nrMatchers = 1;
+ countMatches(1);
+ countMatches(2);
+ if (nrMatchers >= minimumNrMatchers) {
+ break;
+ }
+ if (nextDoc() == NO_MORE_DOCS) {
+ return doc = NO_MORE_DOCS;
+ }
+ }
+ }
+
+ return doc = subScorers[0].docID();
+ }
+
+ // Organize subScorers into a min heap with scorers generating the earliest document on top.
+ private void heapify() {
+ for (int i = (numScorers >> 1) - 1; i >= 0; i--) {
+ heapAdjust(i);
+ }
+ }
+
+ /* The subtree of subScorers at root is a min heap except possibly for its root element.
+ * Bubble the root down as required to make the subtree a heap.
+ */
+ private void heapAdjust(int root) {
+ Scorer scorer = subScorers[root];
+ int doc = scorer.docID();
+ int i = root;
+ final int limit = (numScorers >> 1) - 1;
+ while (i <= limit) {
+ int lchild = (i << 1) + 1;
+ Scorer lscorer = subScorers[lchild];
+ int ldoc = lscorer.docID();
+ int rdoc = Integer.MAX_VALUE, rchild = (i << 1) + 2;
+ Scorer rscorer = null;
+ if (rchild < numScorers) {
+ rscorer = subScorers[rchild];
+ rdoc = rscorer.docID();
+ }
+ if (ldoc < doc) {
+ if (rdoc < ldoc) {
+ subScorers[i] = rscorer;
+ subScorers[rchild] = scorer;
+ i = rchild;
+ } else {
+ subScorers[i] = lscorer;
+ subScorers[lchild] = scorer;
+ i = lchild;
+ }
+ } else if (rdoc < doc) {
+ subScorers[i] = rscorer;
+ subScorers[rchild] = scorer;
+ i = rchild;
+ } else {
+ return;
}
}
}
- /** Scores and collects all matching documents.
- * @param collector The collector to which all matching documents are passed through.
- */
- @Override
- public void score(Collector collector) throws IOException {
- collector.setScorer(this);
- while (nextDoc() != NO_MORE_DOCS) {
- collector.collect(currentDoc);
+ // Remove the root Scorer from subScorers and re-establish it as a heap
+ private void heapRemoveRoot() {
+ if (numScorers == 1) {
+ subScorers[0] = null;
+ numScorers = 0;
+ } else {
+ subScorers[0] = subScorers[numScorers - 1];
+ subScorers[numScorers - 1] = null;
+ --numScorers;
+ heapAdjust(0);
}
}
- /** Expert: Collects matching documents in a range. Hook for optimization.
- * Note that {@link #nextDoc()} must be called once before this method is called
- * for the first time.
- * @param collector The collector to which all matching documents are passed through.
- * @param max Do not score documents past this.
- * @return true if more matching documents may remain.
- */
- @Override
- protected boolean score(Collector collector, int max, int firstDocID) throws IOException {
- // firstDocID is ignored since nextDoc() sets 'currentDoc'
- collector.setScorer(this);
- while (currentDoc < max) {
- collector.collect(currentDoc);
- if (nextDoc() == NO_MORE_DOCS) {
- return false;
- }
- }
- return true;
- }
-
- @Override
- public int nextDoc() throws IOException {
- if (scorerDocQueue.size() < minimumNrMatchers || !advanceAfterCurrent()) {
- currentDoc = NO_MORE_DOCS;
- }
- return currentDoc;
- }
-
- /** Advance all subscorers after the current document determined by the
- * top of the scorerDocQueue.
- * Repeat until at least the minimum number of subscorers match on the same
- * document and all subscorers are after that document or are exhausted.
- *
On entry the scorerDocQueue has at least minimumNrMatchers
- * available. At least the scorer with the minimum document number will be advanced.
- * @return true iff there is a match.
- *
In case there is a match, currentDoc, currentSumScore,
- * and nrMatchers describe the match.
- *
- * TODO: Investigate whether it is possible to use advance() when
- * the minimum number of matchers is bigger than one, ie. try and use the
- * character of ConjunctionScorer for the minimum number of matchers.
- * Also delay calling score() on the sub scorers until the minimum number of
- * matchers is reached.
- *
For this, a Scorer array with minimumNrMatchers elements might
- * hold Scorers at currentDoc that are temporarily popped from scorerQueue.
- */
- protected boolean advanceAfterCurrent() throws IOException {
- do { // repeat until minimum nr of matchers
- currentDoc = scorerDocQueue.topDoc();
- currentScore = scorerDocQueue.topScore();
- nrMatchers = 1;
- do { // Until all subscorers are after currentDoc
- if (!scorerDocQueue.topNextAndAdjustElsePop()) {
- if (scorerDocQueue.size() == 0) {
- break; // nothing more to advance, check for last match.
- }
- }
- if (scorerDocQueue.topDoc() != currentDoc) {
- break; // All remaining subscorers are after currentDoc.
- }
- currentScore += scorerDocQueue.topScore();
- nrMatchers++;
- } while (true);
-
- if (nrMatchers >= minimumNrMatchers) {
- return true;
- } else if (scorerDocQueue.size() < minimumNrMatchers) {
- return false;
- }
- } while (true);
- }
-
- /** Returns the score of the current document matching the query.
- * Initially invalid, until {@link #nextDoc()} is called the first time.
- */
- @Override
- public float score() throws IOException { return currentScore; }
-
- @Override
- public int docID() {
- return currentDoc;
- }
-
- /** Returns the number of subscorers matching the current document.
- * Initially invalid, until {@link #nextDoc()} is called the first time.
- */
- public int nrMatchers() {
- return nrMatchers;
- }
-
- /**
- * Advances to the first match beyond the current whose document number is
- * greater than or equal to a given target.
- * The implementation uses the advance() method on the subscorers.
- *
- * @param target
- * The target document number.
- * @return the document whose number is greater than or equal to the given
- * target, or -1 if none exist.
- */
- @Override
- public int advance(int target) throws IOException {
- if (scorerDocQueue.size() < minimumNrMatchers) {
- return currentDoc = NO_MORE_DOCS;
- }
- if (target <= currentDoc) {
- return currentDoc;
- }
- do {
- if (scorerDocQueue.topDoc() >= target) {
- return advanceAfterCurrent() ? currentDoc : (currentDoc = NO_MORE_DOCS);
- } else if (!scorerDocQueue.topSkipToAndAdjustElsePop(target)) {
- if (scorerDocQueue.size() < minimumNrMatchers) {
- return currentDoc = NO_MORE_DOCS;
- }
- }
- } while (true);
- }
}
diff -r e1577bda921e lucene/src/java/org/apache/lucene/util/ScorerDocQueue.java
--- a/lucene/src/java/org/apache/lucene/util/ScorerDocQueue.java Mon Oct 11 09:43:06 2010 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,219 +0,0 @@
-package org.apache.lucene.util;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Derived from org.apache.lucene.util.PriorityQueue of March 2005 */
-
-import java.io.IOException;
-
-import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.search.Scorer;
-
-/** A ScorerDocQueue maintains a partial ordering of its Scorers such that the
- least Scorer can always be found in constant time. Put()'s and pop()'s
- require log(size) time. The ordering is by Scorer.doc().
- *
- * @lucene.internal
- */
-public class ScorerDocQueue { // later: SpansQueue for spans with doc and term positions
- private final HeapedScorerDoc[] heap;
- private final int maxSize;
- private int size;
-
- private class HeapedScorerDoc {
- Scorer scorer;
- int doc;
-
- HeapedScorerDoc(Scorer s) { this(s, s.docID()); }
-
- HeapedScorerDoc(Scorer scorer, int doc) {
- this.scorer = scorer;
- this.doc = doc;
- }
-
- void adjust() { doc = scorer.docID(); }
- }
-
- private HeapedScorerDoc topHSD; // same as heap[1], only for speed
-
- /** Create a ScorerDocQueue with a maximum size. */
- public ScorerDocQueue(int maxSize) {
- // assert maxSize >= 0;
- size = 0;
- int heapSize = maxSize + 1;
- heap = new HeapedScorerDoc[heapSize];
- this.maxSize = maxSize;
- topHSD = heap[1]; // initially null
- }
-
- /**
- * Adds a Scorer to a ScorerDocQueue in log(size) time.
- * If one tries to add more Scorers than maxSize
- * a RuntimeException (ArrayIndexOutOfBound) is thrown.
- */
- public final void put(Scorer scorer) {
- size++;
- heap[size] = new HeapedScorerDoc(scorer);
- upHeap();
- }
-
- /**
- * Adds a Scorer to the ScorerDocQueue in log(size) time if either
- * the ScorerDocQueue is not full, or not lessThan(scorer, top()).
- * @param scorer
- * @return true if scorer is added, false otherwise.
- */
- public boolean insert(Scorer scorer){
- if (size < maxSize) {
- put(scorer);
- return true;
- } else {
- int docNr = scorer.docID();
- if ((size > 0) && (! (docNr < topHSD.doc))) { // heap[1] is top()
- heap[1] = new HeapedScorerDoc(scorer, docNr);
- downHeap();
- return true;
- } else {
- return false;
- }
- }
- }
-
- /** Returns the least Scorer of the ScorerDocQueue in constant time.
- * Should not be used when the queue is empty.
- */
- public final Scorer top() {
- // assert size > 0;
- return topHSD.scorer;
- }
-
- /** Returns document number of the least Scorer of the ScorerDocQueue
- * in constant time.
- * Should not be used when the queue is empty.
- */
- public final int topDoc() {
- // assert size > 0;
- return topHSD.doc;
- }
-
- public final float topScore() throws IOException {
- // assert size > 0;
- return topHSD.scorer.score();
- }
-
- public final boolean topNextAndAdjustElsePop() throws IOException {
- return checkAdjustElsePop(topHSD.scorer.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
- }
-
- public final boolean topSkipToAndAdjustElsePop(int target) throws IOException {
- return checkAdjustElsePop(topHSD.scorer.advance(target) != DocIdSetIterator.NO_MORE_DOCS);
- }
-
- private boolean checkAdjustElsePop(boolean cond) {
- if (cond) { // see also adjustTop
- topHSD.doc = topHSD.scorer.docID();
- } else { // see also popNoResult
- heap[1] = heap[size]; // move last to first
- heap[size] = null;
- size--;
- }
- downHeap();
- return cond;
- }
-
- /** Removes and returns the least scorer of the ScorerDocQueue in log(size)
- * time.
- * Should not be used when the queue is empty.
- */
- public final Scorer pop() {
- // assert size > 0;
- Scorer result = topHSD.scorer;
- popNoResult();
- return result;
- }
-
- /** Removes the least scorer of the ScorerDocQueue in log(size) time.
- * Should not be used when the queue is empty.
- */
- private final void popNoResult() {
- heap[1] = heap[size]; // move last to first
- heap[size] = null;
- size--;
- downHeap(); // adjust heap
- }
-
- /** Should be called when the scorer at top changes doc() value.
- * Still log(n) worst case, but it's at least twice as fast to
- * { pq.top().change(); pq.adjustTop(); }
- * instead of
- * { o = pq.pop(); o.change(); pq.push(o); }
- *
- */
- public final void adjustTop() {
- // assert size > 0;
- topHSD.adjust();
- downHeap();
- }
-
- /** Returns the number of scorers currently stored in the ScorerDocQueue. */
- public final int size() {
- return size;
- }
-
- /** Removes all entries from the ScorerDocQueue. */
- public final void clear() {
- for (int i = 0; i <= size; i++) {
- heap[i] = null;
- }
- size = 0;
- }
-
- private final void upHeap() {
- int i = size;
- HeapedScorerDoc node = heap[i]; // save bottom node
- int j = i >>> 1;
- while ((j > 0) && (node.doc < heap[j].doc)) {
- heap[i] = heap[j]; // shift parents down
- i = j;
- j = j >>> 1;
- }
- heap[i] = node; // install saved node
- topHSD = heap[1];
- }
-
- private final void downHeap() {
- int i = 1;
- HeapedScorerDoc node = heap[i]; // save top node
- int j = i << 1; // find smaller child
- int k = j + 1;
- if ((k <= size) && (heap[k].doc < heap[j].doc)) {
- j = k;
- }
- while ((j <= size) && (heap[j].doc < node.doc)) {
- heap[i] = heap[j]; // shift up child
- i = j;
- j = i << 1;
- k = j + 1;
- if (k <= size && (heap[k].doc < heap[j].doc)) {
- j = k;
- }
- }
- heap[i] = node; // install saved node
- topHSD = heap[1];
- }
-}
diff -r e1577bda921e lucene/src/test/org/apache/lucene/search/TestBooleanQueryVisitScorers.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lucene/src/test/org/apache/lucene/search/TestBooleanQueryVisitScorers.java Mon Oct 11 10:58:34 2010 -0400
@@ -0,0 +1,184 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.*;
+import org.apache.lucene.util.*;
+import org.apache.lucene.store.*;
+import org.apache.lucene.document.*;
+import org.apache.lucene.analysis.*;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.search.BooleanClause.Occur;
+import java.io.*;
+import java.util.*;
+
+public class TestBooleanQueryVisitScorers extends LuceneTestCase {
+
+ Directory dir = new RAMDirectory();
+ String F1 = "title";
+ String F2 = "body";
+
+ public void test() throws IOException {
+ makeIndex();
+ searchIndex();
+ dir.close();
+ }
+
+ void makeIndex() throws IOException {
+ dir = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random, dir);
+ //writer.addDocument(doc("lucene", "lucene is a very popular search engine library. lucene runs overall in the world. lucene is great!"));
+ writer.addDocument(doc("lucene", "lucene is a very popular search engine library"));
+ writer.addDocument(doc("solr", "solr is a very popular search server and is using lucene"));
+ writer.addDocument(doc("nutch", "nutch is an internet search engine with web crawler and is using lucene and hadoop"));
+ writer.close();
+ }
+
+ Document doc(String v1, String v2) {
+ Document doc = new Document();
+ if(v1 != null)
+ doc.add(field(F1, v1));
+ if(v2 != null)
+ doc.add(field(F2, v2));
+ return doc;
+ }
+
+ Fieldable field(String field, String value) {
+ return new Field(field, value, Store.YES, Index.ANALYZED);
+ }
+
+ void searchIndex() throws IOException {
+ IndexSearcher searcher = new IndexSearcher(dir);
+ checkResult(searcher, query(new Term(F1, "lucene"), new Term(F2, "lucene"), new Term(F2, "search")));
+ searcher.close();
+ }
+
+ Query query(Term... ts) {
+ if (ts == null || ts.length == 0) {
+ throw new IllegalArgumentException();
+ }
+ if (ts.length == 1)
+ return new TermQuery(ts[0]);
+ BooleanQuery bq = new BooleanQuery();
+ for(Term t : ts) {
+ bq.add(new TermQuery(t), Occur.SHOULD);
+ }
+ return bq;
+ }
+
+ void checkResult(IndexSearcher searcher, Query query) throws IOException {
+ MyCollector collector = new MyCollector();
+ searcher.search(query, collector);
+ TopDocs docs = collector.topDocs();
+ for (ScoreDoc scoreDoc : docs.scoreDocs) {
+ //Document doc = searcher.doc(scoreDoc.doc);
+ //float score = scoreDoc.score;
+ //System.out.println(score + " : " + doc.get(F1) + " / " + doc.get(F2));
+ //System.out.println(" freq : " + collector.freq(scoreDoc.doc));
+ assertTrue(collector.freq(scoreDoc.doc) > 0);
+ }
+ }
+
+ static class MyCollector extends Collector {
+
+ private TopDocsCollector