diff -r b9a32a0862d2 lucene/src/java/org/apache/lucene/search/BooleanQuery.java --- a/lucene/src/java/org/apache/lucene/search/BooleanQuery.java Sat Oct 09 08:51:15 2010 -0400 +++ b/lucene/src/java/org/apache/lucene/search/BooleanQuery.java Sat Oct 09 08:51:39 2010 -0400 @@ -322,9 +322,10 @@ optional.add(subScorer); } } - + // Check if we can return a BooleanScorer - if (!scoreDocsInOrder && topScorer && required.size() == 0 && prohibited.size() < 32) { + // nocommit + if (false && !scoreDocsInOrder && topScorer && required.size() == 0 && prohibited.size() < 32) { return new BooleanScorer(this, similarity, minNrShouldMatch, optional, prohibited, maxCoord); } diff -r b9a32a0862d2 lucene/src/java/org/apache/lucene/search/BooleanScorer2.java --- a/lucene/src/java/org/apache/lucene/search/BooleanScorer2.java Sat Oct 09 08:51:15 2010 -0400 +++ b/lucene/src/java/org/apache/lucene/search/BooleanScorer2.java Sat Oct 09 08:51:39 2010 -0400 @@ -261,7 +261,7 @@ : new ReqExclScorer(requiredCountingSumScorer, ((prohibitedScorers.size() == 1) ? prohibitedScorers.get(0) - : new DisjunctionSumScorer(prohibitedScorers))); + : new DisjunctionSumScorer(prohibitedScorers, 1))); } /** Scores and collects all matching documents. diff -r b9a32a0862d2 lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java --- a/lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java Sat Oct 09 08:51:15 2010 -0400 +++ b/lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java Sat Oct 09 08:51:39 2010 -0400 @@ -120,7 +120,7 @@ protected class ConstantScorer extends Scorer { final DocIdSetIterator docIdSetIterator; final float theScore; - int doc = -1; + private int docID = -1; public ConstantScorer(Similarity similarity, IndexReader reader, Weight w) throws IOException { super(similarity,w); @@ -140,12 +140,12 @@ @Override public int nextDoc() throws IOException { - return docIdSetIterator.nextDoc(); + return docID = docIdSetIterator.nextDoc(); } @Override public int docID() { - return docIdSetIterator.docID(); + return docID; } @Override @@ -155,7 +155,7 @@ @Override public int advance(int target) throws IOException { - return docIdSetIterator.advance(target); + return docID = docIdSetIterator.advance(target); } } diff -r b9a32a0862d2 lucene/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java --- a/lucene/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java Sat Oct 09 08:51:15 2010 -0400 +++ b/lucene/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java Sat Oct 09 08:51:39 2010 -0400 @@ -90,6 +90,7 @@ @Override public float score() throws IOException { int doc = subScorers[0].docID(); + // nocommit -- don't make new array!! float[] sum = { subScorers[0].score() }, max = { sum[0] }; int size = numScorers; scoreAll(1, size, doc, sum, max); diff -r b9a32a0862d2 lucene/src/java/org/apache/lucene/search/DisjunctionSumScorer.java --- a/lucene/src/java/org/apache/lucene/search/DisjunctionSumScorer.java Sat Oct 09 08:51:15 2010 -0400 +++ b/lucene/src/java/org/apache/lucene/search/DisjunctionSumScorer.java Sat Oct 09 08:51:39 2010 -0400 @@ -1,12 +1,11 @@ package org.apache.lucene.search; /** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * @@ -17,220 +16,208 @@ * limitations under the License. */ +import java.io.IOException; import java.util.List; -import java.io.IOException; -import org.apache.lucene.util.ScorerDocQueue; +// TODO: this is almost a copy of DisjunctionMaxScorer, +// except we sum the sub scores instead of taking max/tie +// break. Also, this scorer applies the minNrMatchers +// constraint. /** A Scorer for OR like queries, counterpart of ConjunctionScorer. * This Scorer implements {@link Scorer#advance(int)} and uses advance() on the given Scorers. */ + class DisjunctionSumScorer extends Scorer { - /** The number of subscorers. */ - private final int nrScorers; - - /** The subscorers. */ - protected final List subScorers; - - /** The minimum number of scorers that should match. */ + + /* The scorers for subqueries that have remaining docs, kept as a min heap by number of next doc. */ + private final Scorer[] subScorers; private final int minimumNrMatchers; - - /** The scorerDocQueue contains all subscorers ordered by their current doc(), - * with the minimum at the top. - *
The scorerDocQueue is initialized the first time nextDoc() or advance() is called. - *
An exhausted scorer is immediately removed from the scorerDocQueue. - *
If less than the minimumNrMatchers scorers - * remain in the scorerDocQueue nextDoc() and advance() return false. - *

- * After each to call to nextDoc() or advance() - * currentSumScore is the total score of the current matching doc, - * nrMatchers is the number of matching scorers, - * and all scorers are after the matching doc, or are exhausted. - */ - private ScorerDocQueue scorerDocQueue; - - /** The document number of the current match. */ - private int currentDoc = -1; + private int numScorers; + private int doc = -1; + protected int nrMatchers; + private float score; - /** The number of subscorers that provide the current match. */ - protected int nrMatchers = -1; - - private float currentScore = Float.NaN; - - /** Construct a DisjunctionScorer. - * @param subScorers A collection of at least two subscorers. - * @param minimumNrMatchers The positive minimum number of subscorers that should - * match to match this query. - *
When minimumNrMatchers is bigger than - * the number of subScorers, - * no matches will be produced. - *
When minimumNrMatchers equals the number of subScorers, - * it more efficient to use ConjunctionScorer. - */ - public DisjunctionSumScorer( List subScorers, int minimumNrMatchers) throws IOException { + public DisjunctionSumScorer(List subScorersList, int minimumNrMatchers) throws IOException { super(null); - - nrScorers = subScorers.size(); if (minimumNrMatchers <= 0) { throw new IllegalArgumentException("Minimum nr of matchers must be positive"); } - if (nrScorers <= 1) { + if (subScorersList.size() <= 1) { throw new IllegalArgumentException("There must be at least 2 subScorers"); } + if (minimumNrMatchers > subScorersList.size()) { + throw new IllegalArgumentException("Minimum nr of matchers is greater than number of sub scorers"); + } + + if (minimumNrMatchers == 0) { + this.minimumNrMatchers = 1; + } else { + this.minimumNrMatchers = minimumNrMatchers; + } - this.minimumNrMatchers = minimumNrMatchers; - this.subScorers = subScorers; - - initScorerDocQueue(); - } - - /** Construct a DisjunctionScorer, using one as the minimum number - * of matching subscorers. - */ - public DisjunctionSumScorer(List subScorers) throws IOException { - this(subScorers, 1); + numScorers = subScorersList.size(); + subScorers = subScorersList.toArray(new Scorer[numScorers]); + heapify(); + assert subScorers[0].docID() == -1: "sub " + subScorers[0] + " didn't start at docID=-1"; } - /** Called the first time nextDoc() or advance() is called to - * initialize scorerDocQueue. + @Override + public int nextDoc() throws IOException { + while(true) { + while (subScorers[0].docID() == doc) { + if (subScorers[0].nextDoc() != NO_MORE_DOCS) { + heapAdjust(0); + } else { + heapRemoveRoot(); + if (numScorers < minimumNrMatchers) { + return doc = NO_MORE_DOCS; + } + } + } + doc = subScorers[0].docID(); + if (minimumNrMatchers > 1) { + nrMatchers = 1; + countMatches(1); + countMatches(2); + if (nrMatchers >= minimumNrMatchers) { + break; + } + } else { + break; + } + } + + return doc; + } + + private void countMatches(int root) throws IOException { + if (root < numScorers && subScorers[root].docID() == doc) { + nrMatchers++; + countMatches((root<<1)+1); + countMatches((root<<1)+2); + } + } + // only valid after .score() has been invoked + public int nrMatchers() { + return nrMatchers; + } + + @Override + public int docID() { + return doc; + } + + /** Determine the current document score. Initially invalid, until {@link #nextDoc()} is called the first time. + * @return the score of the current generated document */ - private void initScorerDocQueue() throws IOException { - scorerDocQueue = new ScorerDocQueue(nrScorers); - for (Scorer se : subScorers) { - if (se.nextDoc() != NO_MORE_DOCS) { - scorerDocQueue.insert(se); + @Override + public float score() throws IOException { + nrMatchers = 1; + score = subScorers[0].score(); + scoreAll(1); + scoreAll(2); + return score; + } + + // Recursively iterate all subScorers that generated last doc computing sum and max + private void scoreAll(int root) throws IOException { + if (root < numScorers && subScorers[root].docID() == doc) { + nrMatchers++; + score += subScorers[root].score(); + scoreAll((root<<1)+1); + scoreAll((root<<1)+2); + } + } + + @Override + public int advance(int target) throws IOException { + if (numScorers == 0) return doc = NO_MORE_DOCS; + while (subScorers[0].docID() < target) { + if (subScorers[0].advance(target) != NO_MORE_DOCS) { + heapAdjust(0); + } else { + heapRemoveRoot(); + if (numScorers == 0) { + return doc = NO_MORE_DOCS; + } + } + } + + if (minimumNrMatchers > 1) { + while(true) { + nrMatchers = 1; + countMatches(1); + countMatches(2); + if (nrMatchers >= minimumNrMatchers) { + break; + } + if (nextDoc() == NO_MORE_DOCS) { + return doc = NO_MORE_DOCS; + } + } + } + + return doc = subScorers[0].docID(); + } + + // Organize subScorers into a min heap with scorers generating the earliest document on top. + private void heapify() { + for (int i = (numScorers >> 1) - 1; i >= 0; i--) { + heapAdjust(i); + } + } + + /* The subtree of subScorers at root is a min heap except possibly for its root element. + * Bubble the root down as required to make the subtree a heap. + */ + private void heapAdjust(int root) { + Scorer scorer = subScorers[root]; + int doc = scorer.docID(); + int i = root; + final int limit = (numScorers >> 1) - 1; + while (i <= limit) { + int lchild = (i << 1) + 1; + Scorer lscorer = subScorers[lchild]; + int ldoc = lscorer.docID(); + int rdoc = Integer.MAX_VALUE, rchild = (i << 1) + 2; + Scorer rscorer = null; + if (rchild < numScorers) { + rscorer = subScorers[rchild]; + rdoc = rscorer.docID(); + } + if (ldoc < doc) { + if (rdoc < ldoc) { + subScorers[i] = rscorer; + subScorers[rchild] = scorer; + i = rchild; + } else { + subScorers[i] = lscorer; + subScorers[lchild] = scorer; + i = lchild; + } + } else if (rdoc < doc) { + subScorers[i] = rscorer; + subScorers[rchild] = scorer; + i = rchild; + } else { + return; } } } - /** Scores and collects all matching documents. - * @param collector The collector to which all matching documents are passed through. - */ - @Override - public void score(Collector collector) throws IOException { - collector.setScorer(this); - while (nextDoc() != NO_MORE_DOCS) { - collector.collect(currentDoc); + // Remove the root Scorer from subScorers and re-establish it as a heap + private void heapRemoveRoot() { + if (numScorers == 1) { + subScorers[0] = null; + numScorers = 0; + } else { + subScorers[0] = subScorers[numScorers - 1]; + subScorers[numScorers - 1] = null; + --numScorers; + heapAdjust(0); } } - /** Expert: Collects matching documents in a range. Hook for optimization. - * Note that {@link #nextDoc()} must be called once before this method is called - * for the first time. - * @param collector The collector to which all matching documents are passed through. - * @param max Do not score documents past this. - * @return true if more matching documents may remain. - */ - @Override - protected boolean score(Collector collector, int max, int firstDocID) throws IOException { - // firstDocID is ignored since nextDoc() sets 'currentDoc' - collector.setScorer(this); - while (currentDoc < max) { - collector.collect(currentDoc); - if (nextDoc() == NO_MORE_DOCS) { - return false; - } - } - return true; - } - - @Override - public int nextDoc() throws IOException { - if (scorerDocQueue.size() < minimumNrMatchers || !advanceAfterCurrent()) { - currentDoc = NO_MORE_DOCS; - } - return currentDoc; - } - - /** Advance all subscorers after the current document determined by the - * top of the scorerDocQueue. - * Repeat until at least the minimum number of subscorers match on the same - * document and all subscorers are after that document or are exhausted. - *
On entry the scorerDocQueue has at least minimumNrMatchers - * available. At least the scorer with the minimum document number will be advanced. - * @return true iff there is a match. - *
In case there is a match, currentDoc, currentSumScore, - * and nrMatchers describe the match. - * - * TODO: Investigate whether it is possible to use advance() when - * the minimum number of matchers is bigger than one, ie. try and use the - * character of ConjunctionScorer for the minimum number of matchers. - * Also delay calling score() on the sub scorers until the minimum number of - * matchers is reached. - *
For this, a Scorer array with minimumNrMatchers elements might - * hold Scorers at currentDoc that are temporarily popped from scorerQueue. - */ - protected boolean advanceAfterCurrent() throws IOException { - do { // repeat until minimum nr of matchers - currentDoc = scorerDocQueue.topDoc(); - currentScore = scorerDocQueue.topScore(); - nrMatchers = 1; - do { // Until all subscorers are after currentDoc - if (!scorerDocQueue.topNextAndAdjustElsePop()) { - if (scorerDocQueue.size() == 0) { - break; // nothing more to advance, check for last match. - } - } - if (scorerDocQueue.topDoc() != currentDoc) { - break; // All remaining subscorers are after currentDoc. - } - currentScore += scorerDocQueue.topScore(); - nrMatchers++; - } while (true); - - if (nrMatchers >= minimumNrMatchers) { - return true; - } else if (scorerDocQueue.size() < minimumNrMatchers) { - return false; - } - } while (true); - } - - /** Returns the score of the current document matching the query. - * Initially invalid, until {@link #nextDoc()} is called the first time. - */ - @Override - public float score() throws IOException { return currentScore; } - - @Override - public int docID() { - return currentDoc; - } - - /** Returns the number of subscorers matching the current document. - * Initially invalid, until {@link #nextDoc()} is called the first time. - */ - public int nrMatchers() { - return nrMatchers; - } - - /** - * Advances to the first match beyond the current whose document number is - * greater than or equal to a given target.
- * The implementation uses the advance() method on the subscorers. - * - * @param target - * The target document number. - * @return the document whose number is greater than or equal to the given - * target, or -1 if none exist. - */ - @Override - public int advance(int target) throws IOException { - if (scorerDocQueue.size() < minimumNrMatchers) { - return currentDoc = NO_MORE_DOCS; - } - if (target <= currentDoc) { - return currentDoc; - } - do { - if (scorerDocQueue.topDoc() >= target) { - return advanceAfterCurrent() ? currentDoc : (currentDoc = NO_MORE_DOCS); - } else if (!scorerDocQueue.topSkipToAndAdjustElsePop(target)) { - if (scorerDocQueue.size() < minimumNrMatchers) { - return currentDoc = NO_MORE_DOCS; - } - } - } while (true); - } } diff -r b9a32a0862d2 lucene/src/java/org/apache/lucene/search/MultiTermQuery.java --- a/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java Sat Oct 09 08:51:15 2010 -0400 +++ b/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java Sat Oct 09 08:51:39 2010 -0400 @@ -512,7 +512,7 @@ final CutOffTermCollector col = new CutOffTermCollector(reader, query.field, docCountCutoff, termCountLimit); collectTerms(reader, query, col); - + if (col.hasCutOff) { return CONSTANT_SCORE_FILTER_REWRITE.rewrite(reader, query); } else if (col.termCount == 0) { @@ -529,7 +529,9 @@ bq.add(new TermQuery(placeholderTerm.createTerm(bytes)), BooleanClause.Occur.SHOULD); } // Strip scores - final Query result = new ConstantScoreQuery(new QueryWrapperFilter(bq)); + // nocommit + //final Query result = new ConstantScoreQuery(new QueryWrapperFilter(bq)); + final Query result = bq; result.setBoost(query.getBoost()); query.incTotalNumberOfTerms(col.termCount); return result; @@ -549,7 +551,9 @@ public boolean collect(TermsEnum termsEnum, BytesRef bytes, float boost) throws IOException { termCount++; - if (termCount >= termCountLimit || docVisitCount >= docCountCutoff) { + // nocommit + //if (termCount >= termCountLimit || (false && (docVisitCount >= docCountCutoff))) { + if (termCount >= 1024) { hasCutOff = true; return false; } diff -r b9a32a0862d2 lucene/src/java/org/apache/lucene/util/ScorerDocQueue.java --- a/lucene/src/java/org/apache/lucene/util/ScorerDocQueue.java Sat Oct 09 08:51:15 2010 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,219 +0,0 @@ -package org.apache.lucene.util; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Derived from org.apache.lucene.util.PriorityQueue of March 2005 */ - -import java.io.IOException; - -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.Scorer; - -/** A ScorerDocQueue maintains a partial ordering of its Scorers such that the - least Scorer can always be found in constant time. Put()'s and pop()'s - require log(size) time. The ordering is by Scorer.doc(). - * - * @lucene.internal - */ -public class ScorerDocQueue { // later: SpansQueue for spans with doc and term positions - private final HeapedScorerDoc[] heap; - private final int maxSize; - private int size; - - private class HeapedScorerDoc { - Scorer scorer; - int doc; - - HeapedScorerDoc(Scorer s) { this(s, s.docID()); } - - HeapedScorerDoc(Scorer scorer, int doc) { - this.scorer = scorer; - this.doc = doc; - } - - void adjust() { doc = scorer.docID(); } - } - - private HeapedScorerDoc topHSD; // same as heap[1], only for speed - - /** Create a ScorerDocQueue with a maximum size. */ - public ScorerDocQueue(int maxSize) { - // assert maxSize >= 0; - size = 0; - int heapSize = maxSize + 1; - heap = new HeapedScorerDoc[heapSize]; - this.maxSize = maxSize; - topHSD = heap[1]; // initially null - } - - /** - * Adds a Scorer to a ScorerDocQueue in log(size) time. - * If one tries to add more Scorers than maxSize - * a RuntimeException (ArrayIndexOutOfBound) is thrown. - */ - public final void put(Scorer scorer) { - size++; - heap[size] = new HeapedScorerDoc(scorer); - upHeap(); - } - - /** - * Adds a Scorer to the ScorerDocQueue in log(size) time if either - * the ScorerDocQueue is not full, or not lessThan(scorer, top()). - * @param scorer - * @return true if scorer is added, false otherwise. - */ - public boolean insert(Scorer scorer){ - if (size < maxSize) { - put(scorer); - return true; - } else { - int docNr = scorer.docID(); - if ((size > 0) && (! (docNr < topHSD.doc))) { // heap[1] is top() - heap[1] = new HeapedScorerDoc(scorer, docNr); - downHeap(); - return true; - } else { - return false; - } - } - } - - /** Returns the least Scorer of the ScorerDocQueue in constant time. - * Should not be used when the queue is empty. - */ - public final Scorer top() { - // assert size > 0; - return topHSD.scorer; - } - - /** Returns document number of the least Scorer of the ScorerDocQueue - * in constant time. - * Should not be used when the queue is empty. - */ - public final int topDoc() { - // assert size > 0; - return topHSD.doc; - } - - public final float topScore() throws IOException { - // assert size > 0; - return topHSD.scorer.score(); - } - - public final boolean topNextAndAdjustElsePop() throws IOException { - return checkAdjustElsePop(topHSD.scorer.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); - } - - public final boolean topSkipToAndAdjustElsePop(int target) throws IOException { - return checkAdjustElsePop(topHSD.scorer.advance(target) != DocIdSetIterator.NO_MORE_DOCS); - } - - private boolean checkAdjustElsePop(boolean cond) { - if (cond) { // see also adjustTop - topHSD.doc = topHSD.scorer.docID(); - } else { // see also popNoResult - heap[1] = heap[size]; // move last to first - heap[size] = null; - size--; - } - downHeap(); - return cond; - } - - /** Removes and returns the least scorer of the ScorerDocQueue in log(size) - * time. - * Should not be used when the queue is empty. - */ - public final Scorer pop() { - // assert size > 0; - Scorer result = topHSD.scorer; - popNoResult(); - return result; - } - - /** Removes the least scorer of the ScorerDocQueue in log(size) time. - * Should not be used when the queue is empty. - */ - private final void popNoResult() { - heap[1] = heap[size]; // move last to first - heap[size] = null; - size--; - downHeap(); // adjust heap - } - - /** Should be called when the scorer at top changes doc() value. - * Still log(n) worst case, but it's at least twice as fast to

-   *  { pq.top().change(); pq.adjustTop(); }
-   * 
instead of
-   *  { o = pq.pop(); o.change(); pq.push(o); }
-   * 
- */ - public final void adjustTop() { - // assert size > 0; - topHSD.adjust(); - downHeap(); - } - - /** Returns the number of scorers currently stored in the ScorerDocQueue. */ - public final int size() { - return size; - } - - /** Removes all entries from the ScorerDocQueue. */ - public final void clear() { - for (int i = 0; i <= size; i++) { - heap[i] = null; - } - size = 0; - } - - private final void upHeap() { - int i = size; - HeapedScorerDoc node = heap[i]; // save bottom node - int j = i >>> 1; - while ((j > 0) && (node.doc < heap[j].doc)) { - heap[i] = heap[j]; // shift parents down - i = j; - j = j >>> 1; - } - heap[i] = node; // install saved node - topHSD = heap[1]; - } - - private final void downHeap() { - int i = 1; - HeapedScorerDoc node = heap[i]; // save top node - int j = i << 1; // find smaller child - int k = j + 1; - if ((k <= size) && (heap[k].doc < heap[j].doc)) { - j = k; - } - while ((j <= size) && (heap[j].doc < node.doc)) { - heap[i] = heap[j]; // shift up child - i = j; - j = i << 1; - k = j + 1; - if (k <= size && (heap[k].doc < heap[j].doc)) { - j = k; - } - } - heap[i] = node; // install saved node - topHSD = heap[1]; - } -}