ConjunctionScorer.
* This Scorer implements {@link Scorer#advance(int)} and uses advance() on the given Scorers.
*/
+
class DisjunctionSumScorer extends Scorer {
- /** The number of subscorers. */
- private final int nrScorers;
-
- /** The subscorers. */
- protected final List
- * After each to call to nextDoc() or advance()
- * currentSumScore is the total score of the current matching doc,
- * nrMatchers is the number of matching scorers,
- * and all scorers are after the matching doc, or are exhausted.
- */
- private ScorerDocQueue scorerDocQueue;
-
- /** The document number of the current match. */
- private int currentDoc = -1;
+ private int numScorers;
+ private int doc = -1;
+ protected int nrMatchers;
+ private float score;
- /** The number of subscorers that provide the current match. */
- protected int nrMatchers = -1;
-
- private float currentScore = Float.NaN;
-
- /** Construct a DisjunctionScorer.
- * @param subScorers A collection of at least two subscorers.
- * @param minimumNrMatchers The positive minimum number of subscorers that should
- * match to match this query.
- *
When minimumNrMatchers is bigger than
- * the number of subScorers,
- * no matches will be produced.
- *
When minimumNrMatchers equals the number of subScorers,
- * it more efficient to use ConjunctionScorer.
- */
- public DisjunctionSumScorer( ListDisjunctionScorer, using one as the minimum number
- * of matching subscorers.
- */
- public DisjunctionSumScorer(ListscorerDocQueue.
+ @Override
+ public int nextDoc() throws IOException {
+ while(true) {
+ while (subScorers[0].docID() == doc) {
+ if (subScorers[0].nextDoc() != NO_MORE_DOCS) {
+ heapAdjust(0);
+ } else {
+ heapRemoveRoot();
+ if (numScorers < minimumNrMatchers) {
+ return doc = NO_MORE_DOCS;
+ }
+ }
+ }
+ doc = subScorers[0].docID();
+ if (minimumNrMatchers > 1) {
+ nrMatchers = 1;
+ countMatches(1);
+ countMatches(2);
+ if (nrMatchers >= minimumNrMatchers) {
+ break;
+ }
+ } else {
+ break;
+ }
+ }
+
+ return doc;
+ }
+
+ private void countMatches(int root) throws IOException {
+ if (root < numScorers && subScorers[root].docID() == doc) {
+ nrMatchers++;
+ countMatches((root<<1)+1);
+ countMatches((root<<1)+2);
+ }
+ }
+ // only valid after .score() has been invoked
+ public int nrMatchers() {
+ return nrMatchers;
+ }
+
+ @Override
+ public int docID() {
+ return doc;
+ }
+
+ /** Determine the current document score. Initially invalid, until {@link #nextDoc()} is called the first time.
+ * @return the score of the current generated document
*/
- private void initScorerDocQueue() throws IOException {
- scorerDocQueue = new ScorerDocQueue(nrScorers);
- for (Scorer se : subScorers) {
- if (se.nextDoc() != NO_MORE_DOCS) {
- scorerDocQueue.insert(se);
+ @Override
+ public float score() throws IOException {
+ nrMatchers = 1;
+ score = subScorers[0].score();
+ scoreAll(1);
+ scoreAll(2);
+ return score;
+ }
+
+ // Recursively iterate all subScorers that generated last doc computing sum and max
+ private void scoreAll(int root) throws IOException {
+ if (root < numScorers && subScorers[root].docID() == doc) {
+ nrMatchers++;
+ score += subScorers[root].score();
+ scoreAll((root<<1)+1);
+ scoreAll((root<<1)+2);
+ }
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ if (numScorers == 0) return doc = NO_MORE_DOCS;
+ while (subScorers[0].docID() < target) {
+ if (subScorers[0].advance(target) != NO_MORE_DOCS) {
+ heapAdjust(0);
+ } else {
+ heapRemoveRoot();
+ if (numScorers == 0) {
+ return doc = NO_MORE_DOCS;
+ }
+ }
+ }
+
+ if (minimumNrMatchers > 1) {
+ while(true) {
+ nrMatchers = 1;
+ countMatches(1);
+ countMatches(2);
+ if (nrMatchers >= minimumNrMatchers) {
+ break;
+ }
+ if (nextDoc() == NO_MORE_DOCS) {
+ return doc = NO_MORE_DOCS;
+ }
+ }
+ }
+
+ return doc = subScorers[0].docID();
+ }
+
+ // Organize subScorers into a min heap with scorers generating the earliest document on top.
+ private void heapify() {
+ for (int i = (numScorers >> 1) - 1; i >= 0; i--) {
+ heapAdjust(i);
+ }
+ }
+
+ /* The subtree of subScorers at root is a min heap except possibly for its root element.
+ * Bubble the root down as required to make the subtree a heap.
+ */
+ private void heapAdjust(int root) {
+ Scorer scorer = subScorers[root];
+ int doc = scorer.docID();
+ int i = root;
+ final int limit = (numScorers >> 1) - 1;
+ while (i <= limit) {
+ int lchild = (i << 1) + 1;
+ Scorer lscorer = subScorers[lchild];
+ int ldoc = lscorer.docID();
+ int rdoc = Integer.MAX_VALUE, rchild = (i << 1) + 2;
+ Scorer rscorer = null;
+ if (rchild < numScorers) {
+ rscorer = subScorers[rchild];
+ rdoc = rscorer.docID();
+ }
+ if (ldoc < doc) {
+ if (rdoc < ldoc) {
+ subScorers[i] = rscorer;
+ subScorers[rchild] = scorer;
+ i = rchild;
+ } else {
+ subScorers[i] = lscorer;
+ subScorers[lchild] = scorer;
+ i = lchild;
+ }
+ } else if (rdoc < doc) {
+ subScorers[i] = rscorer;
+ subScorers[rchild] = scorer;
+ i = rchild;
+ } else {
+ return;
}
}
}
- /** Scores and collects all matching documents.
- * @param collector The collector to which all matching documents are passed through.
- */
- @Override
- public void score(Collector collector) throws IOException {
- collector.setScorer(this);
- while (nextDoc() != NO_MORE_DOCS) {
- collector.collect(currentDoc);
+ // Remove the root Scorer from subScorers and re-establish it as a heap
+ private void heapRemoveRoot() {
+ if (numScorers == 1) {
+ subScorers[0] = null;
+ numScorers = 0;
+ } else {
+ subScorers[0] = subScorers[numScorers - 1];
+ subScorers[numScorers - 1] = null;
+ --numScorers;
+ heapAdjust(0);
}
}
- /** Expert: Collects matching documents in a range. Hook for optimization.
- * Note that {@link #nextDoc()} must be called once before this method is called
- * for the first time.
- * @param collector The collector to which all matching documents are passed through.
- * @param max Do not score documents past this.
- * @return true if more matching documents may remain.
- */
- @Override
- protected boolean score(Collector collector, int max, int firstDocID) throws IOException {
- // firstDocID is ignored since nextDoc() sets 'currentDoc'
- collector.setScorer(this);
- while (currentDoc < max) {
- collector.collect(currentDoc);
- if (nextDoc() == NO_MORE_DOCS) {
- return false;
- }
- }
- return true;
- }
-
- @Override
- public int nextDoc() throws IOException {
- if (scorerDocQueue.size() < minimumNrMatchers || !advanceAfterCurrent()) {
- currentDoc = NO_MORE_DOCS;
- }
- return currentDoc;
- }
-
- /** Advance all subscorers after the current document determined by the
- * top of the scorerDocQueue.
- * Repeat until at least the minimum number of subscorers match on the same
- * document and all subscorers are after that document or are exhausted.
- *
On entry the scorerDocQueue has at least minimumNrMatchers
- * available. At least the scorer with the minimum document number will be advanced.
- * @return true iff there is a match.
- *
In case there is a match, currentDoc, currentSumScore,
- * and nrMatchers describe the match.
- *
- * TODO: Investigate whether it is possible to use advance() when
- * the minimum number of matchers is bigger than one, ie. try and use the
- * character of ConjunctionScorer for the minimum number of matchers.
- * Also delay calling score() on the sub scorers until the minimum number of
- * matchers is reached.
- *
For this, a Scorer array with minimumNrMatchers elements might
- * hold Scorers at currentDoc that are temporarily popped from scorerQueue.
- */
- protected boolean advanceAfterCurrent() throws IOException {
- do { // repeat until minimum nr of matchers
- currentDoc = scorerDocQueue.topDoc();
- currentScore = scorerDocQueue.topScore();
- nrMatchers = 1;
- do { // Until all subscorers are after currentDoc
- if (!scorerDocQueue.topNextAndAdjustElsePop()) {
- if (scorerDocQueue.size() == 0) {
- break; // nothing more to advance, check for last match.
- }
- }
- if (scorerDocQueue.topDoc() != currentDoc) {
- break; // All remaining subscorers are after currentDoc.
- }
- currentScore += scorerDocQueue.topScore();
- nrMatchers++;
- } while (true);
-
- if (nrMatchers >= minimumNrMatchers) {
- return true;
- } else if (scorerDocQueue.size() < minimumNrMatchers) {
- return false;
- }
- } while (true);
- }
-
- /** Returns the score of the current document matching the query.
- * Initially invalid, until {@link #nextDoc()} is called the first time.
- */
- @Override
- public float score() throws IOException { return currentScore; }
-
- @Override
- public int docID() {
- return currentDoc;
- }
-
- /** Returns the number of subscorers matching the current document.
- * Initially invalid, until {@link #nextDoc()} is called the first time.
- */
- public int nrMatchers() {
- return nrMatchers;
- }
-
- /**
- * Advances to the first match beyond the current whose document number is
- * greater than or equal to a given target.
- * The implementation uses the advance() method on the subscorers.
- *
- * @param target
- * The target document number.
- * @return the document whose number is greater than or equal to the given
- * target, or -1 if none exist.
- */
- @Override
- public int advance(int target) throws IOException {
- if (scorerDocQueue.size() < minimumNrMatchers) {
- return currentDoc = NO_MORE_DOCS;
- }
- if (target <= currentDoc) {
- return currentDoc;
- }
- do {
- if (scorerDocQueue.topDoc() >= target) {
- return advanceAfterCurrent() ? currentDoc : (currentDoc = NO_MORE_DOCS);
- } else if (!scorerDocQueue.topSkipToAndAdjustElsePop(target)) {
- if (scorerDocQueue.size() < minimumNrMatchers) {
- return currentDoc = NO_MORE_DOCS;
- }
- }
- } while (true);
- }
}
diff -r b9a32a0862d2 lucene/src/java/org/apache/lucene/search/MultiTermQuery.java
--- a/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java Sat Oct 09 08:51:15 2010 -0400
+++ b/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java Sat Oct 09 08:51:39 2010 -0400
@@ -512,7 +512,7 @@
final CutOffTermCollector col = new CutOffTermCollector(reader, query.field, docCountCutoff, termCountLimit);
collectTerms(reader, query, col);
-
+
if (col.hasCutOff) {
return CONSTANT_SCORE_FILTER_REWRITE.rewrite(reader, query);
} else if (col.termCount == 0) {
@@ -529,7 +529,9 @@
bq.add(new TermQuery(placeholderTerm.createTerm(bytes)), BooleanClause.Occur.SHOULD);
}
// Strip scores
- final Query result = new ConstantScoreQuery(new QueryWrapperFilter(bq));
+ // nocommit
+ //final Query result = new ConstantScoreQuery(new QueryWrapperFilter(bq));
+ final Query result = bq;
result.setBoost(query.getBoost());
query.incTotalNumberOfTerms(col.termCount);
return result;
@@ -549,7 +551,9 @@
public boolean collect(TermsEnum termsEnum, BytesRef bytes, float boost) throws IOException {
termCount++;
- if (termCount >= termCountLimit || docVisitCount >= docCountCutoff) {
+ // nocommit
+ //if (termCount >= termCountLimit || (false && (docVisitCount >= docCountCutoff))) {
+ if (termCount >= 1024) {
hasCutOff = true;
return false;
}
diff -r b9a32a0862d2 lucene/src/java/org/apache/lucene/util/ScorerDocQueue.java
--- a/lucene/src/java/org/apache/lucene/util/ScorerDocQueue.java Sat Oct 09 08:51:15 2010 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,219 +0,0 @@
-package org.apache.lucene.util;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Derived from org.apache.lucene.util.PriorityQueue of March 2005 */
-
-import java.io.IOException;
-
-import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.search.Scorer;
-
-/** A ScorerDocQueue maintains a partial ordering of its Scorers such that the
- least Scorer can always be found in constant time. Put()'s and pop()'s
- require log(size) time. The ordering is by Scorer.doc().
- *
- * @lucene.internal
- */
-public class ScorerDocQueue { // later: SpansQueue for spans with doc and term positions
- private final HeapedScorerDoc[] heap;
- private final int maxSize;
- private int size;
-
- private class HeapedScorerDoc {
- Scorer scorer;
- int doc;
-
- HeapedScorerDoc(Scorer s) { this(s, s.docID()); }
-
- HeapedScorerDoc(Scorer scorer, int doc) {
- this.scorer = scorer;
- this.doc = doc;
- }
-
- void adjust() { doc = scorer.docID(); }
- }
-
- private HeapedScorerDoc topHSD; // same as heap[1], only for speed
-
- /** Create a ScorerDocQueue with a maximum size. */
- public ScorerDocQueue(int maxSize) {
- // assert maxSize >= 0;
- size = 0;
- int heapSize = maxSize + 1;
- heap = new HeapedScorerDoc[heapSize];
- this.maxSize = maxSize;
- topHSD = heap[1]; // initially null
- }
-
- /**
- * Adds a Scorer to a ScorerDocQueue in log(size) time.
- * If one tries to add more Scorers than maxSize
- * a RuntimeException (ArrayIndexOutOfBound) is thrown.
- */
- public final void put(Scorer scorer) {
- size++;
- heap[size] = new HeapedScorerDoc(scorer);
- upHeap();
- }
-
- /**
- * Adds a Scorer to the ScorerDocQueue in log(size) time if either
- * the ScorerDocQueue is not full, or not lessThan(scorer, top()).
- * @param scorer
- * @return true if scorer is added, false otherwise.
- */
- public boolean insert(Scorer scorer){
- if (size < maxSize) {
- put(scorer);
- return true;
- } else {
- int docNr = scorer.docID();
- if ((size > 0) && (! (docNr < topHSD.doc))) { // heap[1] is top()
- heap[1] = new HeapedScorerDoc(scorer, docNr);
- downHeap();
- return true;
- } else {
- return false;
- }
- }
- }
-
- /** Returns the least Scorer of the ScorerDocQueue in constant time.
- * Should not be used when the queue is empty.
- */
- public final Scorer top() {
- // assert size > 0;
- return topHSD.scorer;
- }
-
- /** Returns document number of the least Scorer of the ScorerDocQueue
- * in constant time.
- * Should not be used when the queue is empty.
- */
- public final int topDoc() {
- // assert size > 0;
- return topHSD.doc;
- }
-
- public final float topScore() throws IOException {
- // assert size > 0;
- return topHSD.scorer.score();
- }
-
- public final boolean topNextAndAdjustElsePop() throws IOException {
- return checkAdjustElsePop(topHSD.scorer.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
- }
-
- public final boolean topSkipToAndAdjustElsePop(int target) throws IOException {
- return checkAdjustElsePop(topHSD.scorer.advance(target) != DocIdSetIterator.NO_MORE_DOCS);
- }
-
- private boolean checkAdjustElsePop(boolean cond) {
- if (cond) { // see also adjustTop
- topHSD.doc = topHSD.scorer.docID();
- } else { // see also popNoResult
- heap[1] = heap[size]; // move last to first
- heap[size] = null;
- size--;
- }
- downHeap();
- return cond;
- }
-
- /** Removes and returns the least scorer of the ScorerDocQueue in log(size)
- * time.
- * Should not be used when the queue is empty.
- */
- public final Scorer pop() {
- // assert size > 0;
- Scorer result = topHSD.scorer;
- popNoResult();
- return result;
- }
-
- /** Removes the least scorer of the ScorerDocQueue in log(size) time.
- * Should not be used when the queue is empty.
- */
- private final void popNoResult() {
- heap[1] = heap[size]; // move last to first
- heap[size] = null;
- size--;
- downHeap(); // adjust heap
- }
-
- /** Should be called when the scorer at top changes doc() value.
- * Still log(n) worst case, but it's at least twice as fast to
- * { pq.top().change(); pq.adjustTop(); }
- * instead of
- * { o = pq.pop(); o.change(); pq.push(o); }
- *
- */
- public final void adjustTop() {
- // assert size > 0;
- topHSD.adjust();
- downHeap();
- }
-
- /** Returns the number of scorers currently stored in the ScorerDocQueue. */
- public final int size() {
- return size;
- }
-
- /** Removes all entries from the ScorerDocQueue. */
- public final void clear() {
- for (int i = 0; i <= size; i++) {
- heap[i] = null;
- }
- size = 0;
- }
-
- private final void upHeap() {
- int i = size;
- HeapedScorerDoc node = heap[i]; // save bottom node
- int j = i >>> 1;
- while ((j > 0) && (node.doc < heap[j].doc)) {
- heap[i] = heap[j]; // shift parents down
- i = j;
- j = j >>> 1;
- }
- heap[i] = node; // install saved node
- topHSD = heap[1];
- }
-
- private final void downHeap() {
- int i = 1;
- HeapedScorerDoc node = heap[i]; // save top node
- int j = i << 1; // find smaller child
- int k = j + 1;
- if ((k <= size) && (heap[k].doc < heap[j].doc)) {
- j = k;
- }
- while ((j <= size) && (heap[j].doc < node.doc)) {
- heap[i] = heap[j]; // shift up child
- i = j;
- j = i << 1;
- k = j + 1;
- if (k <= size && (heap[k].doc < heap[j].doc)) {
- j = k;
- }
- }
- heap[i] = node; // install saved node
- topHSD = heap[1];
- }
-}