Index: CHANGES.txt =================================================================== --- CHANGES.txt (revision 759913) +++ CHANGES.txt (working copy) @@ -3,6 +3,19 @@ ======================= Trunk (not yet released) ======================= +Changes in backwards compatibility policy + + 1. LUCENE-1575: Searchable.search(Weight, Filter, int, Sort) currently, tracks + document scores and sets them in the returned FieldDoc, however in 3.0 it + will move to not track document scores. If document scores tracking is still + needed, you can use Searchable.search(Weight, Filter, Collector) and pass in + a TopFieldCollector instance, using the following code sample: + + TopFieldCollector tfc = TopFieldCollector.create(sort, numHits, fillFields, + true /* trackDocScores */); + searcher.search(weight, filter, tfc); + + Changes in runtime behavior 1. LUCENE-1424: QueryParser now by default uses constant score query @@ -10,6 +23,13 @@ already does so for RangeQuery, as well). Call setConstantScoreRewrite(false) to revert to BooleanQuery rewriting method. (Mark Miller via Mike McCandless) + + 2. LUCENE-1575: TopScoreDocCollector don't filter out zero scoring documents + anymore. You can wrap them (as well as any other Colletor) with + PositiveScoresOnlyCollector to achieve the same effect. + In addition, note that the IndexSearcher methods which take Sort as + parameter will be changed in 3.0 to not track document scores. (Shai Erera + via Mike McCandless) API Changes @@ -69,6 +89,11 @@ 12. LUCENE-1500: Added new InvalidTokenOffsetsException to Highlighter methods to denote issues when offsets in TokenStream tokens exceed the length of the provided text. (Mark Harwood) + +13. LUCENE-1575: HitCollector is now deprecated in favor of a new Collector + abstract class. For easy migration, people can use HitCollectorWrapper which + accepts a HitCollector. Note that this class is also deprecated and will be + removed when HitCollector is removed. (Shai Erera via Mike McCandless) Bug fixes @@ -237,6 +262,11 @@ those segments that did not change, and also speeds up searches that sort by relevance or by field values. (Mark Miller, Mike McCandless) + + 7. LUCENE-1575: The new Collector class decouples collect() from score + computation. Instead it offers a setScorer method which implementors can use + to save the Scorer and use it in collect() when the document's score is + required. (Shai Erera via Mike McCandless) Documentation Index: src/java/org/apache/lucene/search/BooleanScorer.java =================================================================== --- src/java/org/apache/lucene/search/BooleanScorer.java (revision 759913) +++ src/java/org/apache/lucene/search/BooleanScorer.java (working copy) @@ -80,11 +80,11 @@ public boolean done; public boolean required = false; public boolean prohibited = false; - public MultiReaderHitCollector collector; + public Collector collector; public SubScorer next; public SubScorer(Scorer scorer, boolean required, boolean prohibited, - MultiReaderHitCollector collector, SubScorer next) + Collector collector, SubScorer next) throws IOException { this.scorer = scorer; this.done = !scorer.next(); @@ -128,18 +128,32 @@ private int end; private Bucket current; + /** @deprecated use {@link #score(Collector)} instead. */ public void score(HitCollector hc) throws IOException { next(); score(hc, Integer.MAX_VALUE); } + + public void score(Collector collector) throws IOException { + next(); + score(collector, Integer.MAX_VALUE); + } + /** @deprecated use {@link #score(Collector, int)} instead. */ protected boolean score(HitCollector hc, int max) throws IOException { + return score(new HitCollectorWrapper(hc), max); + } + + protected boolean score(Collector collector, int max) throws IOException { if (coordFactors == null) computeCoordFactors(); boolean more; Bucket tmp; + BucketScorer bs = new BucketScorer(); + // The internal loop will set the score and doc before calling collect. + collector.setScorer(bs); do { bucketTable.first = null; @@ -158,7 +172,9 @@ } if (current.coord >= minNrShouldMatch) { - hc.collect(current.doc, current.score * coordFactors[current.coord]); + bs.score = current.score * coordFactors[current.coord]; + bs.doc = current.doc; + collector.collect(current.doc); } } @@ -210,8 +226,9 @@ end += BucketTable.SIZE; for (SubScorer sub = scorers; sub != null; sub = sub.next) { Scorer scorer = sub.scorer; + sub.collector.setScorer(scorer); while (!sub.done && scorer.doc() < end) { - sub.collector.collect(scorer.doc(), scorer.score()); + sub.collector.collect(scorer.doc()); sub.done = !scorer.next(); } if (!sub.done) { @@ -237,6 +254,42 @@ Bucket next; // next valid bucket } + // An internal class which is used in score(Collector, int) for setting the + // current score. This is required since Collector exposes a setScorer method + // and implementations that need the score will call scorer.score(). + // Therefore the only methods that are implemented are score() and doc(). + private static final class BucketScorer extends Scorer { + + float score; + int doc; + + public BucketScorer() { + super(null); + } + + + public Explanation explain(int doc) throws IOException { + return null; + } + + public float score() throws IOException { + return score; + } + + public int doc() { + return doc; + } + + public boolean next() throws IOException { + return false; + } + + public boolean skipTo(int target) throws IOException { + return false; + } + + } + /** A simple hash table of document scores within a range. */ static final class BucketTable { public static final int SIZE = 1 << 11; @@ -249,19 +302,25 @@ public final int size() { return SIZE; } - public MultiReaderHitCollector newCollector(int mask) { - return new Collector(mask, this); + public Collector newCollector(int mask) { + return new BolleanScorerCollector(mask, this); } } - static final class Collector extends MultiReaderHitCollector { + private static final class BolleanScorerCollector extends Collector { private BucketTable bucketTable; private int mask; - public Collector(int mask, BucketTable bucketTable) { + private Scorer scorer; + + public BolleanScorerCollector(int mask, BucketTable bucketTable) { this.mask = mask; this.bucketTable = bucketTable; } - public final void collect(final int doc, final float score) { + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } + + public final void collect(final int doc) throws IOException { final BucketTable table = bucketTable; final int i = doc & BucketTable.MASK; Bucket bucket = table.buckets[i]; @@ -270,14 +329,14 @@ if (bucket.doc != doc) { // invalid bucket bucket.doc = doc; // set doc - bucket.score = score; // initialize score + bucket.score = scorer.score(); // initialize score bucket.bits = mask; // initialize mask bucket.coord = 1; // initialize coord bucket.next = table.first; // push onto valid list table.first = bucket; } else { // valid bucket - bucket.score += score; // increment score + bucket.score += scorer.score(); // increment score bucket.bits |= mask; // add bits in mask bucket.coord++; // increment coord } Index: src/java/org/apache/lucene/search/BooleanScorer2.java =================================================================== --- src/java/org/apache/lucene/search/BooleanScorer2.java (revision 759913) +++ src/java/org/apache/lucene/search/BooleanScorer2.java (working copy) @@ -300,8 +300,17 @@ * @param hc The collector to which all matching documents are passed through * {@link HitCollector#collect(int, float)}. *
When this method is used the {@link #explain(int)} method should not be used. + * @deprecated use {@link #score(Collector)} instead. */ public void score(HitCollector hc) throws IOException { + score(new HitCollectorWrapper(hc)); + } + + /** Scores and collects all matching documents. + * @param collector The collector to which all matching documents are passed through. + *
When this method is used the {@link #explain(int)} method should not be used. + */ + public void score(Collector collector) throws IOException { if (allowDocsOutOfOrder && requiredScorers.size() == 0 && prohibitedScorers.size() < 32) { // fall back to BooleanScorer, scores documents somewhat out of order @@ -314,13 +323,14 @@ while (si.hasNext()) { bs.add((Scorer) si.next(), false /* required */, true /* prohibited */); } - bs.score(hc); + bs.score(collector); } else { if (countingSumScorer == null) { initCountingSumScorer(); } + collector.setScorer(this); while (countingSumScorer.next()) { - hc.collect(countingSumScorer.doc(), score()); + collector.collect(countingSumScorer.doc()); } } } @@ -332,12 +342,25 @@ * {@link HitCollector#collect(int, float)}. * @param max Do not score documents past this. * @return true if more matching documents may remain. + * @deprecated use {@link #score(Collector, int)} instead. */ protected boolean score(HitCollector hc, int max) throws IOException { + return score(new HitCollectorWrapper(hc), max); + } + + /** Expert: Collects matching documents in a range. + *
Note that {@link #next()} must be called once before this method is + * called for the first time. + * @param collector The collector to which all matching documents are passed through. + * @param max Do not score documents past this. + * @return true if more matching documents may remain. + */ + protected boolean score(Collector collector, int max) throws IOException { // null pointer exception when next() was not called before: int docNr = countingSumScorer.doc(); + collector.setScorer(this); while (docNr < max) { - hc.collect(docNr, score()); + collector.collect(docNr); if (! countingSumScorer.next()) { return false; } Index: src/java/org/apache/lucene/search/Collector.java =================================================================== --- src/java/org/apache/lucene/search/Collector.java (revision 759639) +++ src/java/org/apache/lucene/search/Collector.java (working copy) @@ -22,32 +22,82 @@ import org.apache.lucene.index.IndexReader; /** - * Expert: A HitCollector that can be used to collect hits - * across sequential IndexReaders. For a Multi*Reader, this - * collector advances through each of the sub readers, in an - * arbitrary order. This results in a higher performance - * means of collection. - * - * NOTE: The doc that is passed to the collect method - * is relative to the current reader. You must re-base the - * doc, by recording the docBase from the last setNextReader - * call, to map it to the docID space of the - * Multi*Reader. - * - * NOTE: This API is experimental and might change in - * incompatible ways in the next release. + * Expert: Collectors are primarily meant to be used to implement queries, + * sorting and filtering. This class replaces the deprecated HitCollector and + * offers API for efficient collection of hits across sequential + * {@link IndexReader}s. The collector advances through each of the sub readers, + * in an arbitrary order. This results in a higher performance means of + * collection.
+ * It also decouples the score from the collected doc. Collectors who do not + * need the score during collection time, gain by not computing a score of a + * document unnecessarily. Collectors who do can use the setScorer(Scorer) + * method to compute the current doc's score.
+ * + * NOTE: The doc that is passed to the collect method is relative to the + * current reader. You must re-base the doc, by recording the docBase from the + * last setNextReader call, to map it to the docID space of the Multi*Reader. + * + *

+ * If, for example, an application wished to collect all of the hits for a query + * in a BitSet, then it might: + * + *

+ * Searcher searcher = new IndexSearcher(indexReader);
+ * final BitSet bits = new BitSet(indexReader.maxDoc());
+ * searcher.search(query, new Collector() {
+ *   private int docBase = 0;
+ * 
+ *   // ignore scorer
+ *   public void setScorer(Scorer scorer) {
+ *   }
+ * 
+ *   public void collect(int doc) {
+ *     bits.set(doc + docBase);
+ *   }
+ * 
+ *   public void setNextReader(IndexReader reader, int docBase) {
+ *     this.docBase = docBase;
+ *   }
+ * });
+ * 
+ * + * NOTE: This API is experimental and might change in incompatible ways + * in the next release. */ -public abstract class MultiReaderHitCollector extends HitCollector { +public abstract class Collector { + /** - * Called before collecting from each IndexReader. All doc - * ids in {@link #collect(int, float)} will correspond to reader. + * Called before successive calls to {@link #collect(int)}. Implementations + * that need the score of the current document (passed-in to + * {@link #collect(int)}), should save the passed-in Scorer and call + * scorer.score() when needed. + */ + public abstract void setScorer(Scorer scorer) throws IOException; + + /** + * Called once for every document matching a query, with the unbased document + * number. * - * Add docBase to the current IndexReaders internal document id to - * re-base ids in {@link #collect(int, float)}. + *

+ * Note: This is called in an inner search loop. For good search performance, + * implementations of this method should not call {@link Searcher#doc(int)} or + * {@link org.apache.lucene.index.IndexReader#document(int)} on every document + * number encountered. Doing so can slow searches by an order of magnitude or + * more. + */ + public abstract void collect(int doc) throws IOException; + + /** + * Called before collecting from each IndexReader. All doc ids in + * {@link #collect(int)} will correspond to reader. * - * @param reader next IndexReader + * Add docBase to the current IndexReaders internal document id to re-base ids + * in {@link #collect(int)}. + * + * @param reader + * next IndexReader * @param docBase - * @throws IOException */ public abstract void setNextReader(IndexReader reader, int docBase) throws IOException; + } Index: src/java/org/apache/lucene/search/DisjunctionSumScorer.java =================================================================== --- src/java/org/apache/lucene/search/DisjunctionSumScorer.java (revision 759913) +++ src/java/org/apache/lucene/search/DisjunctionSumScorer.java (working copy) @@ -112,10 +112,20 @@ * @param hc The collector to which all matching documents are passed through * {@link HitCollector#collect(int, float)}. *
When this method is used the {@link #explain(int)} method should not be used. + * @deprecated use {@link #score(Collector)} instead. */ public void score(HitCollector hc) throws IOException { + score(new HitCollectorWrapper(hc)); + } + + /** Scores and collects all matching documents. + * @param collector The collector to which all matching documents are passed through. + *
When this method is used the {@link #explain(int)} method should not be used. + */ + public void score(Collector collector) throws IOException { + collector.setScorer(this); while (next()) { - hc.collect(currentDoc, currentScore); + collector.collect(currentDoc); } } @@ -126,10 +136,23 @@ * {@link HitCollector#collect(int, float)}. * @param max Do not score documents past this. * @return true if more matching documents may remain. + * @deprecated use {@link #score(Collector, int)} instead. */ protected boolean score(HitCollector hc, int max) throws IOException { + return score(new HitCollectorWrapper(hc), max); + } + + /** Expert: Collects matching documents in a range. Hook for optimization. + * Note that {@link #next()} must be called once before this method is called + * for the first time. + * @param collector The collector to which all matching documents are passed through. + * @param max Do not score documents past this. + * @return true if more matching documents may remain. + */ + protected boolean score(Collector collector, int max) throws IOException { + collector.setScorer(this); while (currentDoc < max) { - hc.collect(currentDoc, currentScore); + collector.collect(currentDoc); if (!next()) { return false; } Index: src/java/org/apache/lucene/search/FieldComparator.java =================================================================== --- src/java/org/apache/lucene/search/FieldComparator.java (revision 759913) +++ src/java/org/apache/lucene/search/FieldComparator.java (working copy) @@ -62,11 +62,11 @@ return values[slot1] - values[slot2]; } - public int compareBottom(int doc, float score) { + public int compareBottom(int doc) { return bottom - currentReaderValues[doc]; } - public void copy(int slot, int doc, float score) { + public void copy(int slot, int doc) { values[slot] = currentReaderValues[doc]; } @@ -87,7 +87,7 @@ public Comparable value(int slot) { return new Byte(values[slot]); } - }; + } /** Sorts by ascending docID */ public static final class DocComparator extends FieldComparator { @@ -104,12 +104,12 @@ return docIDs[slot1] - docIDs[slot2]; } - public int compareBottom(int doc, float score) { + public int compareBottom(int doc) { // No overflow risk because docIDs are non-negative return bottom - (docBase + doc); } - public void copy(int slot, int doc, float score) { + public void copy(int slot, int doc) { docIDs[slot] = docBase + doc; } @@ -131,7 +131,7 @@ public Comparable value(int slot) { return new Integer(docIDs[slot]); } - }; + } /** Parses field's values as double (using {@link * ExtendedFieldCache#getDoubles} and sorts by ascending value */ @@ -160,7 +160,7 @@ } } - public int compareBottom(int doc, float score) { + public int compareBottom(int doc) { final double v2 = currentReaderValues[doc]; if (bottom > v2) { return 1; @@ -171,7 +171,7 @@ } } - public void copy(int slot, int doc, float score) { + public void copy(int slot, int doc) { values[slot] = currentReaderValues[doc]; } @@ -192,7 +192,7 @@ public Comparable value(int slot) { return new Double(values[slot]); } - }; + } /** Parses field's values as float (using {@link * FieldCache#getFloats} and sorts by ascending value */ @@ -223,7 +223,7 @@ } } - public int compareBottom(int doc, float score) { + public int compareBottom(int doc) { // TODO: are there sneaky non-branch ways to compute // sign of float? final float v2 = currentReaderValues[doc]; @@ -236,7 +236,7 @@ } } - public void copy(int slot, int doc, float score) { + public void copy(int slot, int doc) { values[slot] = currentReaderValues[doc]; } @@ -256,7 +256,7 @@ public Comparable value(int slot) { return new Float(values[slot]); } - }; + } /** Parses field's values as int (using {@link * FieldCache#getInts} and sorts by ascending value */ @@ -289,7 +289,7 @@ } } - public int compareBottom(int doc, float score) { + public int compareBottom(int doc) { // TODO: there are sneaky non-branch ways to compute // -1/+1/0 sign // Cannot return bottom - values[slot2] because that @@ -304,7 +304,7 @@ } } - public void copy(int slot, int doc, float score) { + public void copy(int slot, int doc) { values[slot] = currentReaderValues[doc]; } @@ -324,7 +324,7 @@ public Comparable value(int slot) { return new Integer(values[slot]); } - }; + } /** Parses field's values as long (using {@link * ExtendedFieldCache#getLongs} and sorts by ascending value */ @@ -355,7 +355,7 @@ } } - public int compareBottom(int doc, float score) { + public int compareBottom(int doc) { // TODO: there are sneaky non-branch ways to compute // -1/+1/0 sign final long v2 = currentReaderValues[doc]; @@ -368,7 +368,7 @@ } } - public void copy(int slot, int doc, float score) { + public void copy(int slot, int doc) { values[slot] = currentReaderValues[doc]; } @@ -389,7 +389,7 @@ public Comparable value(int slot) { return new Long(values[slot]); } - }; + } /** Sorts by descending relevance. NOTE: if you are * sorting only by descending relevance and then @@ -400,7 +400,8 @@ public static final class RelevanceComparator extends FieldComparator { private final float[] scores; private float bottom; - + private Scorer scorer; + RelevanceComparator(int numHits) { scores = new float[numHits]; } @@ -408,27 +409,16 @@ public int compare(int slot1, int slot2) { final float score1 = scores[slot1]; final float score2 = scores[slot2]; - if (score1 > score2) { - return -1; - } else if (score1 < score2) { - return 1; - } else { - return 0; - } + return score1 > score2 ? -1 : (score1 < score2 ? 1 : 0); } - public int compareBottom(int doc, float score) { - if (bottom > score) { - return -1; - } else if (bottom < score) { - return 1; - } else { - return 0; - } + public int compareBottom(int doc) throws IOException { + float score = scorer.score(); + return bottom > score ? -1 : (bottom < score ? 1 : 0); } - public void copy(int slot, int doc, float score) { - scores[slot] = score; + public void copy(int slot, int doc) throws IOException { + scores[slot] = scorer.score(); } public void setNextReader(IndexReader reader, int docBase, int numSlotsFull) { @@ -438,6 +428,12 @@ this.bottom = scores[bottom]; } + public void setScorer(Scorer scorer) { + // wrap with a ScoreCachingWrappingScorer so that successive calls to + // score() will not incur score computation over and over again. + this.scorer = new ScoreCachingWrappingScorer(scorer); + } + public int sortType() { return SortField.SCORE; } @@ -445,7 +441,7 @@ public Comparable value(int slot) { return new Float(scores[slot]); } - }; + } /** Parses field's values as short (using {@link * FieldCache#getShorts} and sorts by ascending value */ @@ -466,11 +462,11 @@ return values[slot1] - values[slot2]; } - public int compareBottom(int doc, float score) { + public int compareBottom(int doc) { return bottom - currentReaderValues[doc]; } - public void copy(int slot, int doc, float score) { + public void copy(int slot, int doc) { values[slot] = currentReaderValues[doc]; } @@ -491,7 +487,7 @@ public Comparable value(int slot) { return new Short(values[slot]); } - }; + } /** Sorts by a field's value using the Collator for a * given Locale.*/ @@ -523,7 +519,7 @@ return collator.compare(val1, val2); } - public int compareBottom(int doc, float score) { + public int compareBottom(int doc) { final String val2 = currentReaderValues[doc]; if (bottom == null) { if (val2 == null) { @@ -536,7 +532,7 @@ return collator.compare(bottom, val2); } - public void copy(int slot, int doc, float score) { + public void copy(int slot, int doc) { values[slot] = currentReaderValues[doc]; } @@ -556,7 +552,7 @@ public Comparable value(int slot) { return values[slot]; } - }; + } // NOTE: there were a number of other interesting String // comparators explored, but this one seemed to perform @@ -608,7 +604,7 @@ return val1.compareTo(val2); } - public int compareBottom(int doc, float score) { + public int compareBottom(int doc) { assert bottomSlot != -1; int order = this.order[doc]; final int cmp = bottomOrd - order; @@ -659,7 +655,7 @@ ords[slot] = index; } - public void copy(int slot, int doc, float score) { + public void copy(int slot, int doc) { final int ord = order[doc]; ords[slot] = ord; assert ord >= 0; @@ -709,7 +705,7 @@ public String getField() { return field; } - }; + } /** Sorts by field's natural String sort order. All * comparisons are done using String.compareTo, which is @@ -742,7 +738,7 @@ return val1.compareTo(val2); } - public int compareBottom(int doc, float score) { + public int compareBottom(int doc) { final String val2 = currentReaderValues[doc]; if (bottom == null) { if (val2 == null) { @@ -755,7 +751,7 @@ return bottom.compareTo(val2); } - public void copy(int slot, int doc, float score) { + public void copy(int slot, int doc) { values[slot] = currentReaderValues[doc]; } @@ -775,11 +771,11 @@ public Comparable value(int slot) { return values[slot]; } - }; + } final protected static int binarySearch(String[] a, String key) { return binarySearch(a, key, 0, a.length-1); - }; + } final protected static int binarySearch(String[] a, String key, int low, int high) { @@ -801,7 +797,7 @@ return mid; } return -(low + 1); - }; + } /** * Compare hit at slot1 with hit at slot2. Return @@ -827,22 +823,20 @@ * only invoked after setBottom has been called. * * @param doc that was hit - * @param score of the hit * @return any N < 0 if the doc's value is sorted after * the bottom entry (not competitive), any N > 0 if the * doc's value is sorted before the bottom entry and 0 if * they are equal. */ - public abstract int compareBottom(int doc, float score); + public abstract int compareBottom(int doc) throws IOException; /** * Copy hit (doc,score) to hit slot. * * @param slot which slot to copy the hit to * @param doc docID relative to current reader - * @param score hit score */ - public abstract void copy(int slot, int doc, float score); + public abstract void copy(int slot, int doc) throws IOException; /** * Set a new Reader. All doc correspond to the current Reader. @@ -854,6 +848,12 @@ */ public abstract void setNextReader(IndexReader reader, int docBase, int numSlotsFull) throws IOException; + /** Sets the Scorer to use in case a document's score is needed. */ + public void setScorer(Scorer scorer) { + // Empty implementation since most comparators don't need the score. This + // can be overridden by those that need it. + } + /** * @return SortField.TYPE */ Index: src/java/org/apache/lucene/search/FieldComparatorSource.java =================================================================== --- src/java/org/apache/lucene/search/FieldComparatorSource.java (revision 759913) +++ src/java/org/apache/lucene/search/FieldComparatorSource.java (working copy) @@ -18,7 +18,6 @@ */ import java.io.IOException; -import org.apache.lucene.index.IndexReader; /** * Provides a {@link FieldComparator} for custom field sorting. @@ -38,6 +37,6 @@ * @throws IOException * If an error occurs reading the index. */ - public abstract FieldComparator newComparator(String fieldname, IndexReader[] subReaders, int numHits, int sortPos, boolean reversed) + public abstract FieldComparator newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws IOException; } Index: src/java/org/apache/lucene/search/FieldValueHitQueue.java =================================================================== --- src/java/org/apache/lucene/search/FieldValueHitQueue.java (revision 759913) +++ src/java/org/apache/lucene/search/FieldValueHitQueue.java (working copy) @@ -17,13 +17,13 @@ * limitations under the License. */ +import java.io.IOException; + import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermEnum; import org.apache.lucene.util.PriorityQueue; -import java.io.IOException;; - /** * Expert: A hit queue for sorting by hits by terms in more than one field. * Uses FieldCache.DEFAULT for maintaining @@ -65,7 +65,7 @@ * in order that they will be searched * @throws IOException */ - public FieldValueHitQueue(SortField[] fields, int size, IndexReader[] subReaders) throws IOException { + public FieldValueHitQueue(SortField[] fields, int size) throws IOException { numComparators = fields.length; comparators = new FieldComparator[numComparators]; reverseMul = new int[numComparators]; @@ -82,7 +82,7 @@ assert field.getType() != SortField.AUTO; reverseMul[i] = field.reverse ? -1 : 1; - comparators[i] = field.getComparator(subReaders, size, i, field.reverse); + comparators[i] = field.getComparator(size, i, field.reverse); } if (numComparators == 1) { @@ -180,7 +180,7 @@ * Attempts to detect the given field type for an IndexReader. */ static int detectFieldType(IndexReader reader, String fieldKey) throws IOException { - String field = ((String)fieldKey).intern(); + String field = fieldKey.intern(); TermEnum enumerator = reader.terms (new Term (field)); try { Term term = enumerator.term(); Index: src/java/org/apache/lucene/search/HitCollector.java =================================================================== --- src/java/org/apache/lucene/search/HitCollector.java (revision 759913) +++ src/java/org/apache/lucene/search/HitCollector.java (working copy) @@ -17,15 +17,15 @@ * limitations under the License. */ -/** Lower-level search API. - *
HitCollectors are primarily meant to be used to implement queries, - * sorting and filtering. See {@link - * MultiReaderHitCollector} for a lower level and - * higher performance (on a multi-segment index) API. - * @see Searcher#search(Query,HitCollector) - * @version $Id$ +/** + * Lower-level search API.
+ * HitCollectors are primarily meant to be used to implement queries, sorting + * and filtering. + * + * @deprecated use {@link Collector} instead. */ public abstract class HitCollector { + /** Called once for every document matching a query, with the document * number and its raw score. * @@ -51,4 +51,5 @@ * between 0 and 1. */ public abstract void collect(int doc, float score); + } Index: src/java/org/apache/lucene/search/HitCollectorWrapper.java =================================================================== --- src/java/org/apache/lucene/search/HitCollectorWrapper.java (revision 0) +++ src/java/org/apache/lucene/search/HitCollectorWrapper.java (revision 0) @@ -0,0 +1,49 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.IndexReader; + +/** + * Wrapper for non expert ({@link HitCollector}) implementations, which simply + * re-bases the incoming docID before calling {@link HitCollector#collect}. + * @deprecated this class should be removed when {@link HitCollector} is removed. + */ +class HitCollectorWrapper extends Collector { + private HitCollector collector; + private int base = -1; + private Scorer scorer = null; + + public HitCollectorWrapper(HitCollector collector) { + this.collector = collector; + } + + public void setNextReader(IndexReader reader, int docBase) { + base = docBase; + } + + public void collect(int doc) throws IOException { + collector.collect(doc + base, scorer.score()); + } + + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } +} Index: src/java/org/apache/lucene/search/IndexSearcher.java =================================================================== --- src/java/org/apache/lucene/search/IndexSearcher.java (revision 759913) +++ src/java/org/apache/lucene/search/IndexSearcher.java (working copy) @@ -188,12 +188,16 @@ throws IOException { return search(weight, filter, nDocs, sort, true); } - - /** - * Just like {@link #search(Weight, Filter, int, Sort)}, - * but you choose whether or not the fields in the - * returned {@link FieldDoc} instances should be set by - * specifying fillFields. + + /** + * Just like {@link #search(Weight, Filter, int, Sort)}, but you choose + * whether or not the fields in the returned {@link FieldDoc} instances should + * be set by specifying fillFields.
+ * NOTE: currently, this method tracks document scores and sets them in + * the returned {@link FieldDoc}, however in 3.0 it will move to not track + * document scores. If document scores tracking is still needed, you can use + * {@link #search(Weight, Filter, Collector)} and pass in a + * {@link TopFieldCollector} instance. */ public TopFieldDocs search(Weight weight, Filter filter, final int nDocs, Sort sort, boolean fillFields) @@ -222,29 +226,32 @@ if (legacy) { // Search the single top-level reader - TopScoreDocCollector collector = new TopFieldDocCollector(reader, sort, nDocs); - collector.setNextReader(reader, 0); - doSearch(reader, weight, filter, collector); + TopDocCollector collector = new TopFieldDocCollector(reader, sort, nDocs); + HitCollectorWrapper hcw = new HitCollectorWrapper(collector); + hcw.setNextReader(reader, 0); + doSearch(reader, weight, filter, hcw); return (TopFieldDocs) collector.topDocs(); - } else { - // Search each sub-reader - TopFieldCollector collector = new TopFieldCollector(sort, nDocs, sortedSubReaders, fillFields); - search(weight, filter, collector); - return (TopFieldDocs) collector.topDocs(); } + // Search each sub-reader + // TODO: by default we should create a TopFieldCollector which does not + // track document scores. Currently the default is set to true, however it + // will change in 3.0. + TopFieldCollector collector = TopFieldCollector.create(sort, nDocs, fillFields, true); + search(weight, filter, collector); + return (TopFieldDocs) collector.topDocs(); } // inherit javadoc + /** @deprecated use {@link #search(Weight, Filter, Collector)} instead. */ public void search(Weight weight, Filter filter, HitCollector results) throws IOException { - - final MultiReaderHitCollector collector; - if (results instanceof MultiReaderHitCollector) { - collector = (MultiReaderHitCollector) results; - } else { - collector = new MultiReaderCollectorWrapper(results); - } - + search(weight, filter, new HitCollectorWrapper(results)); + } + + // inherit javadoc + public void search(Weight weight, Filter filter, Collector collector) + throws IOException { + for (int i = 0; i < sortedSubReaders.length; i++) { // search each subreader collector.setNextReader(sortedSubReaders[i], sortedStarts[i]); doSearch(sortedSubReaders[i], weight, filter, collector); @@ -252,14 +259,14 @@ } private void doSearch(IndexReader reader, Weight weight, Filter filter, - final HitCollector results) throws IOException { + final Collector collector) throws IOException { Scorer scorer = weight.scorer(reader); if (scorer == null) return; if (filter == null) { - scorer.score(results); + scorer.score(collector); return; } @@ -267,6 +274,7 @@ boolean more = filterDocIdIterator.next() && scorer.skipTo(filterDocIdIterator.doc()); + collector.setScorer(scorer); while (more) { int filterDocId = filterDocIdIterator.doc(); if (filterDocId > scorer.doc() && !scorer.skipTo(filterDocId)) { @@ -274,7 +282,7 @@ } else { int scorerDocId = scorer.doc(); if (scorerDocId == filterDocId) { // permitted by filter - results.collect(scorerDocId, scorer.score()); + collector.collect(scorerDocId); more = filterDocIdIterator.next(); } else { more = filterDocIdIterator.skipTo(scorerDocId); @@ -295,26 +303,4 @@ public Explanation explain(Weight weight, int doc) throws IOException { return weight.explain(reader, doc); } - - /** - * Wrapper for non expert ({@link HitCollector}) - * implementations, which simply re-bases the incoming - * docID before calling {@link HitCollector#collect}. - */ - static class MultiReaderCollectorWrapper extends MultiReaderHitCollector { - private HitCollector collector; - private int base = -1; - - public MultiReaderCollectorWrapper(HitCollector collector) { - this.collector = collector; - } - - public void collect(int doc, float score) { - collector.collect(doc + base, score); - } - - public void setNextReader(IndexReader reader, int docBase) { - base = docBase; - } - } } Index: src/java/org/apache/lucene/search/MultiReaderHitCollector.java =================================================================== --- src/java/org/apache/lucene/search/MultiReaderHitCollector.java (revision 759913) +++ src/java/org/apache/lucene/search/MultiReaderHitCollector.java (working copy) @@ -1,53 +0,0 @@ -package org.apache.lucene.search; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.index.IndexReader; - -/** - * Expert: A HitCollector that can be used to collect hits - * across sequential IndexReaders. For a Multi*Reader, this - * collector advances through each of the sub readers, in an - * arbitrary order. This results in a higher performance - * means of collection. - * - * NOTE: The doc that is passed to the collect method - * is relative to the current reader. You must re-base the - * doc, by recording the docBase from the last setNextReader - * call, to map it to the docID space of the - * Multi*Reader. - * - * NOTE: This API is experimental and might change in - * incompatible ways in the next release. - */ -public abstract class MultiReaderHitCollector extends HitCollector { - /** - * Called before collecting from each IndexReader. All doc - * ids in {@link #collect(int, float)} will correspond to reader. - * - * Add docBase to the current IndexReaders internal document id to - * re-base ids in {@link #collect(int, float)}. - * - * @param reader next IndexReader - * @param docBase - * @throws IOException - */ - public abstract void setNextReader(IndexReader reader, int docBase) throws IOException; -} Index: src/java/org/apache/lucene/search/MultiSearcher.java =================================================================== --- src/java/org/apache/lucene/search/MultiSearcher.java (revision 759913) +++ src/java/org/apache/lucene/search/MultiSearcher.java (working copy) @@ -97,9 +97,14 @@ throw new UnsupportedOperationException(); } + /** @deprecated use {@link #search(Weight, Filter, Collector)} instead. */ public void search(Weight weight, Filter filter, HitCollector results) { throw new UnsupportedOperationException(); } + + public void search(Weight weight, Filter filter, Collector collector) { + throw new UnsupportedOperationException(); + } public TopDocs search(Weight weight,Filter filter,int n) { throw new UnsupportedOperationException(); @@ -251,41 +256,32 @@ return new TopFieldDocs (totalHits, scoreDocs, hq.getFields(), maxScore); } - // inherit javadoc + /** @deprecated use {@link #search(Weight, Filter, Collector)} instead. */ public void search(Weight weight, Filter filter, final HitCollector results) throws IOException { + search(weight, filter, new HitCollectorWrapper(results)); + } + + // inherit javadoc + public void search(Weight weight, Filter filter, final Collector collector) + throws IOException { for (int i = 0; i < searchables.length; i++) { - + final int start = starts[i]; - - final MultiReaderHitCollector hc; - if (results instanceof MultiReaderHitCollector) { - // results can shift - final MultiReaderHitCollector resultsMulti = (MultiReaderHitCollector) results; - hc = new MultiReaderHitCollector() { - public void collect(int doc, float score) { - resultsMulti.collect(doc, score); - } - - public void setNextReader(IndexReader reader, int docBase) throws IOException { - resultsMulti.setNextReader(reader, start+docBase); - } - }; - } else { - // We must shift the docIDs - hc = new MultiReaderHitCollector() { - private int docBase; - public void collect(int doc, float score) { - results.collect(doc + docBase + start, score); - } - - public void setNextReader(IndexReader reader, int docBase) { - this.docBase = docBase; - } - }; - } + final Collector hc = new Collector() { + public void setScorer(Scorer scorer) throws IOException { + collector.setScorer(scorer); + } + public void collect(int doc) throws IOException { + collector.collect(doc); + } + public void setNextReader(IndexReader reader, int docBase) throws IOException { + collector.setNextReader(reader, start + docBase); + } + }; + searchables[i].search(weight, filter, hc); } } Index: src/java/org/apache/lucene/search/ParallelMultiSearcher.java =================================================================== --- src/java/org/apache/lucene/search/ParallelMultiSearcher.java (revision 759913) +++ src/java/org/apache/lucene/search/ParallelMultiSearcher.java (working copy) @@ -170,44 +170,51 @@ * @param results to receive hits * * @todo parallelize this one too + * @deprecated use {@link #search(Weight, Filter, Collector)} instead. */ public void search(Weight weight, Filter filter, final HitCollector results) throws IOException { - for (int i = 0; i < searchables.length; i++) { + search(weight, filter, new HitCollectorWrapper(results)); + } - final int start = starts[i]; + /** Lower-level search API. + * + *

{@link Collector#collect(int)} is called for every matching document. + * + *

Applications should only use this if they need all of the + * matching documents. The high-level search API ({@link + * Searcher#search(Query)}) is usually more efficient, as it skips + * non-high-scoring hits. + * + * @param weight to match documents + * @param filter if non-null, a bitset used to eliminate some documents + * @param results to receive hits + * + * @todo parallelize this one too + */ + public void search(Weight weight, Filter filter, final Collector collector) + throws IOException { + for (int i = 0; i < searchables.length; i++) { - final MultiReaderHitCollector hc; - if (results instanceof MultiReaderHitCollector) { - // results can shift - final MultiReaderHitCollector resultsMulti = (MultiReaderHitCollector) results; - hc = new MultiReaderHitCollector() { - public void collect(int doc, float score) { - resultsMulti.collect(doc, score); - } + final int start = starts[i]; - public void setNextReader(IndexReader reader, int docBase) throws IOException { - resultsMulti.setNextReader(reader, start+docBase); - } - }; - } else { - // We must shift the docIDs - hc = new MultiReaderHitCollector() { - private int docBase; - public void collect(int doc, float score) { - results.collect(doc + docBase + start, score); - } + final Collector hc = new Collector() { + public void setScorer(Scorer scorer) throws IOException { + collector.setScorer(scorer); + } + public void collect(int doc) throws IOException { + collector.collect(doc); + } + + public void setNextReader(IndexReader reader, int docBase) throws IOException { + collector.setNextReader(reader, start + docBase); + } + }; + + searchables[i].search(weight, filter, hc); + } + } - public void setNextReader(IndexReader reader, int docBase) { - this.docBase = docBase; - } - }; - } - - searchables[i].search(weight, filter, hc); - } - } - /* * TODO: this one could be parallelized too * @see org.apache.lucene.search.Searchable#rewrite(org.apache.lucene.search.Query) Index: src/java/org/apache/lucene/search/QueryWrapperFilter.java =================================================================== --- src/java/org/apache/lucene/search/QueryWrapperFilter.java (revision 759913) +++ src/java/org/apache/lucene/search/QueryWrapperFilter.java (working copy) @@ -50,9 +50,12 @@ public BitSet bits(IndexReader reader) throws IOException { final BitSet bits = new BitSet(reader.maxDoc()); - new IndexSearcher(reader).search(query, new MultiReaderHitCollector() { + new IndexSearcher(reader).search(query, new Collector() { private int base = -1; - public final void collect(int doc, float score) { + public void setScorer(Scorer scorer) throws IOException { + // score is not needed by this collector + } + public final void collect(int doc) { bits.set(doc + base); // set bit for hit } public void setNextReader(IndexReader reader, int docBase) { Index: src/java/org/apache/lucene/search/RemoteSearchable.java =================================================================== --- src/java/org/apache/lucene/search/RemoteSearchable.java (revision 759913) +++ src/java/org/apache/lucene/search/RemoteSearchable.java (working copy) @@ -45,12 +45,17 @@ this.local = local; } - + /** @deprecated use {@link #search(Weight, Filter, Collector)} instead. */ public void search(Weight weight, Filter filter, HitCollector results) throws IOException { local.search(weight, filter, results); } + public void search(Weight weight, Filter filter, Collector results) + throws IOException { + local.search(weight, filter, results); + } + public void close() throws IOException { local.close(); } Index: src/java/org/apache/lucene/search/ScoreCachingWrappingScorer.java =================================================================== --- src/java/org/apache/lucene/search/ScoreCachingWrappingScorer.java (revision 0) +++ src/java/org/apache/lucene/search/ScoreCachingWrappingScorer.java (revision 0) @@ -0,0 +1,83 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +/** + * A {@link Scorer} which wraps another scorer and caches the score of the + * current document. Successive calls to {@link #score()} will return the same + * result and will not invoke the wrapped Scorer's score() method, unless the + * current document has changed.
+ * This class might be useful due to the changes done to the {@link Collector} + * interface, in which the score is not computed for a document by default, only + * if the collector requests it. Some collectors may need to use the score in + * several places, however all they have in hand is a {@link Scorer} object, and + * might end up computing the score of a document more than once. + */ +public class ScoreCachingWrappingScorer extends Scorer { + + private Scorer scorer; + private int curDoc = -1; + private float curScore; + + /** Creates a new instance by wrapping the given scorer. */ + public ScoreCachingWrappingScorer(Scorer scorer) { + super(scorer.getSimilarity()); + this.scorer = scorer; + } + + protected boolean score(Collector collector, int max) throws IOException { + return scorer.score(collector, max); + } + + public Similarity getSimilarity() { + return scorer.getSimilarity(); + } + + public Explanation explain(int doc) throws IOException { + return scorer.explain(doc); + } + + public float score() throws IOException { + int doc = doc(); + if (doc != curDoc) { + curScore = scorer.score(); + curDoc = doc; + } + + return curScore; + } + + public int doc() { + return scorer.doc(); + } + + public boolean next() throws IOException { + return scorer.next(); + } + + public void score(Collector collector) throws IOException { + scorer.score(collector); + } + + public boolean skipTo(int target) throws IOException { + return scorer.skipTo(target); + } + +} Index: src/java/org/apache/lucene/search/Scorer.java =================================================================== --- src/java/org/apache/lucene/search/Scorer.java (revision 759913) +++ src/java/org/apache/lucene/search/Scorer.java (working copy) @@ -52,10 +52,20 @@ * @param hc The collector to which all matching documents are passed through * {@link HitCollector#collect(int, float)}. *
When this method is used the {@link #explain(int)} method should not be used. + * @deprecated use {@link #score(Collector)} instead. */ public void score(HitCollector hc) throws IOException { + score(new HitCollectorWrapper(hc)); + } + + /** Scores and collects all matching documents. + * @param collector The collector to which all matching documents are passed. + *
When this method is used the {@link #explain(int)} method should not be used. + */ + public void score(Collector collector) throws IOException { + collector.setScorer(this); while (next()) { - hc.collect(doc(), score()); + collector.collect(doc()); } } @@ -66,10 +76,23 @@ * {@link HitCollector#collect(int, float)}. * @param max Do not score documents past this. * @return true if more matching documents may remain. + * @deprecated use {@link #score(Collector, int)} instead. */ protected boolean score(HitCollector hc, int max) throws IOException { + return score(new HitCollectorWrapper(hc), max); + } + + /** Expert: Collects matching documents in a range. Hook for optimization. + * Note that {@link #next()} must be called once before this method is called + * for the first time. + * @param collector The collector to which all matching documents are passed. + * @param max Do not score documents past this. + * @return true if more matching documents may remain. + */ + protected boolean score(Collector collector, int max) throws IOException { + collector.setScorer(this); while (doc() < max) { - hc.collect(doc(), score()); + collector.collect(doc()); if (!next()) return false; } Index: src/java/org/apache/lucene/search/Searchable.java =================================================================== --- src/java/org/apache/lucene/search/Searchable.java (revision 759913) +++ src/java/org/apache/lucene/search/Searchable.java (working copy) @@ -51,10 +51,32 @@ * @param filter if non-null, used to permit documents to be collected. * @param results to receive hits * @throws BooleanQuery.TooManyClauses + * @deprecated use {@link #search(Weight, Filter, Collector)} instead. */ void search(Weight weight, Filter filter, HitCollector results) throws IOException; + /** + * Lower-level search API. + * + *

+ * {@link Collector#collect(int)} is called for every document.
+ * Collector-based access to remote indexes is discouraged. + * + *

+ * Applications should only use this if they need all of the matching + * documents. The high-level search API ({@link Searcher#search(Query)}) is + * usually more efficient, as it skips non-high-scoring hits. + * + * @param weight + * to match documents + * @param filter + * if non-null, used to permit documents to be collected. + * @param results + * to receive hits + * @throws BooleanQuery.TooManyClauses + */ + void search(Weight weight, Filter filter, Collector collector) throws IOException; /** Frees resources associated with this Searcher. * Be careful not to call this method while you are still using objects @@ -147,6 +169,13 @@ * *

Applications should usually call {@link * Searcher#search(Query,Filter,Sort)} instead. + * + * NOTE: currently, this method tracks document scores and sets them in + * the returned {@link FieldDoc}, however in 3.0 it will move to not track + * document scores. If document scores tracking is still needed, you can use + * {@link #search(Weight, Filter, Collector)} and pass in a + * {@link TopFieldCollector} instance. + * * @throws BooleanQuery.TooManyClauses */ TopFieldDocs search(Weight weight, Filter filter, int n, Sort sort) Index: src/java/org/apache/lucene/search/Searcher.java =================================================================== --- src/java/org/apache/lucene/search/Searcher.java (revision 759913) +++ src/java/org/apache/lucene/search/Searcher.java (working copy) @@ -76,9 +76,13 @@ * the top n hits for query, applying * filter if non-null, and sorting the hits by the criteria in * sort. + * + * NOTE: currently, this method tracks document scores and sets them in + * the returned {@link FieldDoc}, however in 3.0 it will move to not track + * document scores. If document scores tracking is still needed, you can use + * {@link #search(Weight, Filter, Collector)} and pass in a + * {@link TopFieldCollector} instance. * - *

Applications should usually call {@link - * Searcher#search(Query,Filter,Sort)} instead. * @throws BooleanQuery.TooManyClauses */ public TopFieldDocs search(Query query, Filter filter, int n, @@ -99,6 +103,7 @@ * In other words, the score will not necessarily be a float whose value is * between 0 and 1. * @throws BooleanQuery.TooManyClauses + * @deprecated use {@link #search(Query, Collector)} instead. */ public void search(Query query, HitCollector results) throws IOException { @@ -106,6 +111,24 @@ } /** Lower-level search API. + * + *

{@link Collector#collect(int)} is called for every matching document. + * + *

Applications should only use this if they need all of the + * matching documents. The high-level search API ({@link + * Searcher#search(Query)}) is usually more efficient, as it skips + * non-high-scoring hits. + *

Note: The score passed to this method is a raw score. + * In other words, the score will not necessarily be a float whose value is + * between 0 and 1. + * @throws BooleanQuery.TooManyClauses + */ + public void search(Query query, Collector results) + throws IOException { + search(query, (Filter)null, results); + } + + /** Lower-level search API. * *

{@link HitCollector#collect(int,float)} is called for every matching * document. @@ -120,11 +143,33 @@ * @param filter if non-null, used to permit documents to be collected. * @param results to receive hits * @throws BooleanQuery.TooManyClauses + * @deprecated use {@link #search(Query, Filter, Collector)} instead. */ public void search(Query query, Filter filter, HitCollector results) throws IOException { search(createWeight(query), filter, results); } + + /** Lower-level search API. + * + *

{@link Collector#collect(int)} is called for every matching + * document. + *
Collector-based access to remote indexes is discouraged. + * + *

Applications should only use this if they need all of the + * matching documents. The high-level search API ({@link + * Searcher#search(Query, Filter, int)}) is usually more efficient, as it skips + * non-high-scoring hits. + * + * @param query to match documents + * @param filter if non-null, used to permit documents to be collected. + * @param results to receive hits + * @throws BooleanQuery.TooManyClauses + */ + public void search(Query query, Filter filter, Collector results) + throws IOException { + search(createWeight(query), filter, results); + } /** Finds the top n * hits for query, applying filter if non-null. @@ -197,7 +242,11 @@ /* The following abstract methods were added as a workaround for GCJ bug #15411. * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=15411 */ + /** + * @deprecated use {@link #search(Weight, Filter, Collector)} instead. + */ abstract public void search(Weight weight, Filter filter, HitCollector results) throws IOException; + abstract public void search(Weight weight, Filter filter, Collector results) throws IOException; abstract public void close() throws IOException; abstract public int docFreq(Term term) throws IOException; abstract public int maxDoc() throws IOException; Index: src/java/org/apache/lucene/search/SortField.java =================================================================== --- src/java/org/apache/lucene/search/SortField.java (revision 759913) +++ src/java/org/apache/lucene/search/SortField.java (working copy) @@ -21,8 +21,6 @@ import java.io.Serializable; import java.util.Locale; -import org.apache.lucene.index.IndexReader; - /** * Stores information about how to sort documents by terms in an individual * field. Fields must be indexed in order to sort by them. @@ -434,8 +432,6 @@ /** Returns the {@link FieldComparator} to use for sorting. - * @param subReaders array of {@link IndexReader} search - * will step through * @param numHits number of top hits the queue will store * @param sortPos position of this SortField within {@link * Sort}. The comparator is primary if sortPos==0, @@ -444,7 +440,7 @@ * @param reversed True if the SortField is reversed * @return {@link FieldComparator} to use when sorting */ - protected FieldComparator getComparator(final IndexReader[] subReaders, final int numHits, final int sortPos, final boolean reversed) throws IOException { + protected FieldComparator getComparator(final int numHits, final int sortPos, final boolean reversed) throws IOException { if (locale != null) { // TODO: it'd be nice to allow FieldCache.getStringIndex @@ -480,7 +476,7 @@ case SortField.CUSTOM: assert factory == null && comparatorSource != null; - return comparatorSource.newComparator(field, subReaders, numHits, sortPos, reversed); + return comparatorSource.newComparator(field, numHits, sortPos, reversed); case SortField.STRING: return new FieldComparator.StringOrdValComparator(numHits, field, sortPos, reversed); Index: src/java/org/apache/lucene/search/TermScorer.java =================================================================== --- src/java/org/apache/lucene/search/TermScorer.java (revision 759913) +++ src/java/org/apache/lucene/search/TermScorer.java (working copy) @@ -24,6 +24,23 @@ /** Expert: A Scorer for documents matching a Term. */ final class TermScorer extends Scorer { + + // Used internally in score(Collector, int) for setting the score of the current document. + private static final class InternalScorer extends Scorer { + private int d; + private float score; + + public InternalScorer() { + super(null); + } + + public Explanation explain(int doc) throws IOException { return null; } + public float score() throws IOException { return score; } + public int doc() { return d; } + public boolean next() throws IOException { return false; } + public boolean skipTo(int target) throws IOException { return false; } + } + private Weight weight; private TermDocs termDocs; private byte[] norms; @@ -56,36 +73,52 @@ scoreCache[i] = getSimilarity().tf(i) * weightValue; } + /** @deprecated use {@link #score(Collector)} instead. */ public void score(HitCollector hc) throws IOException { + score(new HitCollectorWrapper(hc)); + } + + public void score(Collector c) throws IOException { next(); - score(hc, Integer.MAX_VALUE); + score(c, Integer.MAX_VALUE); } + /** @deprecated use {@link #score(Collector, int)} instead. */ protected boolean score(HitCollector c, int end) throws IOException { + return score(new HitCollectorWrapper(c), end); + } + + protected boolean score(Collector c, int end) throws IOException { Similarity similarity = getSimilarity(); // cache sim in local float[] normDecoder = Similarity.getNormDecoder(); + InternalScorer s = new InternalScorer(); + c.setScorer(s); while (doc < end) { // for docs in window int f = freqs[pointer]; float score = // compute tf(f)*weight f < SCORE_CACHE_SIZE // check cache - ? scoreCache[f] // cache hit - : similarity.tf(f)*weightValue; // cache miss - - score *= normDecoder[norms[doc] & 0xFF]; // normalize for field - - c.collect(doc, score); // collect score - - if (++pointer >= pointerMax) { - pointerMax = termDocs.read(docs, freqs); // refill buffers - if (pointerMax != 0) { - pointer = 0; - } else { - termDocs.close(); // close stream - doc = Integer.MAX_VALUE; // set to sentinel value - return false; - } - } - doc = docs[pointer]; + ? scoreCache[f] // cache hit + : similarity.tf(f)*weightValue; // cache miss + + score *= normDecoder[norms[doc] & 0xFF]; // normalize for field + + // Set the Scorer doc and score before calling collect in case it will be + // used in collect() + s.d = doc; + s.score = score; + c.collect(doc); // collect score + + if (++pointer >= pointerMax) { + pointerMax = termDocs.read(docs, freqs); // refill buffers + if (pointerMax != 0) { + pointer = 0; + } else { + termDocs.close(); // close stream + doc = Integer.MAX_VALUE; // set to sentinel value + return false; + } + } + doc = docs[pointer]; } return true; } Index: src/java/org/apache/lucene/search/TimeLimitedCollector.java =================================================================== --- src/java/org/apache/lucene/search/TimeLimitedCollector.java (revision 759913) +++ src/java/org/apache/lucene/search/TimeLimitedCollector.java (working copy) @@ -28,7 +28,7 @@ * TimeExceeded Exception.

* */ -public class TimeLimitedCollector extends MultiReaderHitCollector { +public class TimeLimitedCollector extends Collector { /** * Default timer resolution. @@ -136,19 +136,25 @@ private final long t0; private final long timeout; - private final MultiReaderHitCollector hc; + private final Collector collector; /** * Create a TimeLimitedCollector wrapper over another HitCollector with a specified timeout. * @param hc the wrapped HitCollector * @param timeAllowed max time allowed for collecting hits after which {@link TimeExceededException} is thrown + * @deprecated use {@link #TimeLimitedCollector(Collector, long)} instead. */ public TimeLimitedCollector( final HitCollector hc, final long timeAllowed ) { - if (hc instanceof MultiReaderHitCollector) { - this.hc = (MultiReaderHitCollector) hc; - } else { - this.hc = new IndexSearcher.MultiReaderCollectorWrapper(hc); - } + this(new HitCollectorWrapper(hc), timeAllowed); + } + + /** + * Create a TimeLimitedCollector wrapper over another HitCollector with a specified timeout. + * @param hc the wrapped HitCollector + * @param timeAllowed max time allowed for collecting hits after which {@link TimeExceededException} is thrown + */ + public TimeLimitedCollector(final Collector collector, final long timeAllowed ) { + this.collector = collector; t0 = TIMER_THREAD.getMilliseconds(); this.timeout = t0 + timeAllowed; } @@ -158,18 +164,18 @@ * * @throws TimeExceededException if the time allowed has been exceeded. */ - public void collect( final int doc, final float score ) { + public void collect(final int doc) throws IOException { long time = TIMER_THREAD.getMilliseconds(); if( timeout < time) { if (greedy) { //System.out.println(this+" greedy: before failing, collecting doc: "+doc+" "+(time-t0)); - hc.collect( doc, score ); + collector.collect(doc); } //System.out.println(this+" failing on: "+doc+" "+(time-t0)); throw new TimeExceededException( timeout-t0, time-t0, doc ); } //System.out.println(this+" collecting: "+doc+" "+(time-t0)); - hc.collect( doc, score ); + collector.collect(doc); } /** @@ -220,6 +226,10 @@ } public void setNextReader(IndexReader reader, int base) throws IOException { - hc.setNextReader(reader, base); + collector.setNextReader(reader, base); } + + public void setScorer(Scorer scorer) throws IOException { + collector.setScorer(scorer); + } } Index: src/java/org/apache/lucene/search/TopDocs.java =================================================================== --- src/java/org/apache/lucene/search/TopDocs.java (revision 759913) +++ src/java/org/apache/lucene/search/TopDocs.java (working copy) @@ -29,7 +29,10 @@ /** Expert: Stores the maximum score value encountered, needed for normalizing. */ private float maxScore; - /** Expert: Returns the maximum score value encountered. */ + /** + * Expert: Returns the maximum score value encountered. Note that in case + * scores are not tracked, this returns {@link Float#NaN}. + */ public float getMaxScore() { return maxScore; } @@ -38,7 +41,12 @@ public void setMaxScore(float maxScore) { this.maxScore=maxScore; } - + + /** Expert: Constructs a TopDocs with a default maxScore=Float.NaN. */ + TopDocs(int totalHits, ScoreDoc[] scoreDocs) { + this(totalHits, scoreDocs, Float.NaN); + } + /** Expert: Constructs a TopDocs.*/ public TopDocs(int totalHits, ScoreDoc[] scoreDocs, float maxScore) { this.totalHits = totalHits; Index: src/java/org/apache/lucene/search/TopDocsCollector.java =================================================================== --- src/java/org/apache/lucene/search/TopDocsCollector.java (revision 0) +++ src/java/org/apache/lucene/search/TopDocsCollector.java (revision 0) @@ -0,0 +1,129 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.Collector; +import org.apache.lucene.util.PriorityQueue; + +/** + * A base class for all collectors that return a {@link TopDocs} output. This + * collector allows easy extension by providing a single constructor which + * accepts a {@link PriorityQueue} as well as protected members for that + * priority queue and a counter of the number of total hits.
+ * Extending classes can override {@link #topDocs(int, int)} and + * {@link #getTotalHits()} in order to provide their own implementation. + */ +public abstract class TopDocsCollector extends Collector { + + // This is used in case topDocs() is called with illegal parameters, or there + // simply aren't (enough) results. + protected static final TopDocs EMPTY_TOPDOCS = new TopDocs(0, new ScoreDoc[0], Float.NaN); + + /** + * The priority queue which holds the top documents. Note that different + * implementations of PriorityQueue give different meaning to 'top documents'. + * HitQueue for example aggregates the top scoring documents, while other PQ + * implementations may hold documents sorted by other criteria. + */ + protected PriorityQueue pq; + + /** The total number of documents that the collector encountered. */ + protected int totalHits; + + protected TopDocsCollector(PriorityQueue pq) { + this.pq = pq; + } + + /** + * Populates the results array with the ScoreDoc instaces. This can be + * overridden in case a different ScoreDoc type should be returned. + */ + protected void populateResults(ScoreDoc[] results, int howMany) { + for (int i = howMany - 1; i >= 0; i--) { + results[i] = (ScoreDoc) pq.pop(); + } + } + + /** + * Returns a {@link TopDocs} instance containing the given results. If + * results is null it means there are no results to return, + * either because there were 0 calls to collect() or because the arguments to + * topDocs were invalid. + */ + protected TopDocs newTopDocs(ScoreDoc[] results, int start) { + return results == null ? EMPTY_TOPDOCS : new TopDocs(totalHits, results); + } + + /** The total number of documents that matched this query. */ + public int getTotalHits() { + return totalHits; + } + + /** Returns the top docs that were collected by this collector. */ + public final TopDocs topDocs() { + return topDocs(0, pq.size()); + } + + /** + * Returns the documents in the rage [start .. pq.size()) that were collected + * by this collector. Note that if start >= pq.size(), an empty TopDocs is + * returned.
+ * This method is convenient to call if the application allways asks for the + * last results, starting from the last 'page'. + */ + public final TopDocs topDocs(int start) { + return topDocs(start, pq.size()); + } + + /** + * Returns the documents in the rage [start .. start+howMany) that were + * collected by this collector. Note that if start >= pq.size(), an empty + * TopDocs is returned, and if pq.size() - start < howMany, then only the + * available documents in [start .. pq.size()) are returned.
+ * This method is useful to call in case pagination of search results is + * allowed by the search application, as well as it attempts to optimize the + * memory used by allocating only as much as requested by howMany. + */ + public TopDocs topDocs(int start, int howMany) { + + int pqsize = pq.size(); + + // Don't bother to throw an exception, just return an empty TopDocs in case + // the parameters are invalid or out of range. + if (start < 0 || start >= pqsize || howMany <= 0) { + return newTopDocs(null, start); + } + + // We know that start < pqsize, so just fix howMany. + howMany = Math.min(pqsize - start, howMany); + ScoreDoc[] results = new ScoreDoc[howMany]; + + // pq's pop() returns the 'least' element in the queue, therefore need + // to discard the first ones, until we reach the requested range. + // Note that this loop will usually not be executed, since the common usage + // should be that the caller asks for the last howMany results. However it's + // needed here for completeness. + for (int i = pqsize - start - howMany; i > 0; i--) { pq.pop(); } + + // Get the requested results from pq. + populateResults(results, howMany); + + return newTopDocs(results, start); + } + +} Index: src/java/org/apache/lucene/search/TopFieldCollector.java =================================================================== --- src/java/org/apache/lucene/search/TopFieldCollector.java (revision 759913) +++ src/java/org/apache/lucene/search/TopFieldCollector.java (working copy) @@ -21,201 +21,316 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.FieldValueHitQueue.Entry; +import org.apache.lucene.util.PriorityQueue; /** - * A {@link HitCollector} that sorts by {@link SortField} using + * A {@link Collector} that sorts by {@link SortField} using * {@link FieldComparator}s. - * - * NOTE: This API is experimental and might change in - * incompatible ways in the next release. + * + * NOTE: This API is experimental and might change in incompatible ways + * in the next release. */ -public final class TopFieldCollector extends MultiReaderHitCollector { +public abstract class TopFieldCollector extends TopDocsCollector { + // TODO: one optimization we could do is to pre-fill + // the queue with sentinel value that guaranteed to + // always compare lower than a real hit; this would + // save having to check queueFull on each insert - private final FieldValueHitQueue queue; + /* + * Implements a TopFieldCollector over one SortField criteria, without + * tracking document scores. + */ + private static class OneComparatorNonScoringCollector extends TopFieldCollector { - private final FieldComparator[] comparators; - private FieldComparator comparator1; - private final int numComparators; - private int[] reverseMul; - private int reverseMul1 = 0; - - private final int numHits; - private int totalHits; - private FieldValueHitQueue.Entry bottom = null; - - /** Stores the maximum score value encountered, needed for normalizing. */ - private float maxScore = Float.NEGATIVE_INFINITY; - - private boolean queueFull; - - private boolean fillFields; - - public TopFieldCollector(Sort sort, int numHits, IndexReader[] subReaders, boolean fillFields) - throws IOException { - - if (sort.fields.length == 0) { - throw new IllegalArgumentException("Sort must contain at least one field"); + final private FieldComparator comparator; + final private int reverseMul; + + public OneComparatorNonScoringCollector(FieldValueHitQueue queue, + int numHits, boolean fillFields) throws IOException { + super(queue, numHits, fillFields); + comparator = queue.getComparators()[0]; + reverseMul = queue.getReverseMul()[0]; } - - queue = new FieldValueHitQueue(sort.fields, numHits, subReaders); - comparators = queue.getComparators(); - reverseMul = queue.getReverseMul(); - numComparators = comparators.length; - - if (numComparators == 1) { - comparator1 = comparators[0]; - reverseMul1 = reverseMul[0]; - } else { - comparator1 = null; - reverseMul1 = 0; + + public void collect(int doc) throws IOException { + ++totalHits; + if (queueFull) { + // Fastmatch: return if this hit is not competitive + final int cmp = reverseMul * comparator.compareBottom(doc); + if (cmp < 0 || (cmp == 0 && doc + docBase > bottom.docID)) { + return; + } + + // This hit is competitive - replace bottom element in queue & adjustTop + comparator.copy(bottom.slot, doc); + updateBottom(doc, docScore); + comparator.setBottom(bottom.slot); + } else { + // Startup transient: queue hasn't gathered numHits yet + final int slot = totalHits - 1; + // Copy hit into queue + comparator.copy(slot, doc); + add(slot, doc, docScore); + if (queueFull) { + comparator.setBottom(bottom.slot); + } + } } - this.numHits = numHits; - this.fillFields = fillFields; + + public void setNextReader(IndexReader reader, int docBase) throws IOException { + final int numSlotsFull = queueFull ? pq.size() : totalHits; + this.docBase = docBase; + comparator.setNextReader(reader, docBase, numSlotsFull); + } + + public void setScorer(Scorer scorer) throws IOException { + comparator.setScorer(scorer); + } + } - int currentDocBase; + /* + * Implements a TopFieldCollector over one SortField criteria, with tracking + * document scores. + */ + private static class OneComparatorScoringCollector extends OneComparatorNonScoringCollector { - // javadoc inherited - public void setNextReader(IndexReader reader, int docBase) throws IOException { - final int numSlotsFull; - if (queueFull) - numSlotsFull = numHits; - else - numSlotsFull = totalHits; - - currentDocBase = docBase; - - for (int i = 0; i < numComparators; i++) { - comparators[i].setNextReader(reader, docBase, numSlotsFull); + private Scorer scorer; + + public OneComparatorScoringCollector(FieldValueHitQueue queue, + int numHits, boolean fillFields) throws IOException { + super(queue, numHits, fillFields); } + + public void collect(int doc) throws IOException { + docScore = scorer.score(); + maxScore = Math.max(maxScore, docScore); + super.collect(doc); + } + + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + super.setScorer(scorer); + } } - private final void updateBottom(int doc, float score) { - bottom.docID = currentDocBase + doc; - bottom.score = score; - queue.adjustTop(); - bottom = (FieldValueHitQueue.Entry) queue.top(); - } - - private final void add(int slot, int doc, float score) { - queue.put(new FieldValueHitQueue.Entry(slot, currentDocBase+doc, score)); - bottom = (FieldValueHitQueue.Entry) queue.top(); - queueFull = totalHits == numHits; - } - - // javadoc inherited - public void collect(int doc, float score) { - if (score > 0.0f) { - - maxScore = Math.max(maxScore, score); - totalHits++; - - // TODO: one optimization we could do is to pre-fill - // the queue with sentinel value that guaranteed to - // always compare lower than a real hit; this would - // save having to check queueFull on each insert - + /* + * Implements a TopFieldCollector over multiple SortField criteria, without + * tracking document scores. + */ + private static class MultiComparatorNonScoringCollector extends TopFieldCollector { + + final private FieldComparator[] comparators; + final private int[] reverseMul; + + public MultiComparatorNonScoringCollector(FieldValueHitQueue queue, + int numHits, boolean fillFields) throws IOException { + super(queue, numHits, fillFields); + comparators = queue.getComparators(); + reverseMul = queue.getReverseMul(); + } + + public void collect(int doc) throws IOException { + ++totalHits; if (queueFull) { - - if (numComparators == 1) { - // Common case - - // Fastmatch: return if this hit is not competitive - final int cmp = reverseMul1 * comparator1.compareBottom(doc, score); - if (cmp < 0) { + // Fastmatch: return if this hit is not competitive + for (int i = 0;; i++) { + final int c = reverseMul[i] * comparators[i].compareBottom(doc); + if (c < 0) { // Definitely not competitive return; - } else if (cmp == 0 && doc + currentDocBase > bottom.docID) { - // Definitely not competitive - return; - } - - // This hit is competitive -- replace bottom - // element in queue & adjustTop - comparator1.copy(bottom.slot, doc, score); - - updateBottom(doc, score); - - comparator1.setBottom(bottom.slot); - - } else { - - // Fastmatch: return if this hit is not competitive - for(int i=0;;i++) { - final int c = reverseMul[i] * comparators[i].compareBottom(doc, score); - if (c < 0) { + } else if (c > 0) { + // Definitely competitive + break; + } else if (i == comparators.length - 1) { + // This is the equals case. + if (doc + docBase > bottom.docID) { // Definitely not competitive return; - } else if (c > 0) { - // Definitely competitive - break; - } else if (i == numComparators-1) { - // This is the equals case. - if (doc + currentDocBase > bottom.docID) { - // Definitely not competitive - return; - } else { - break; - } } + break; } + } - // This hit is competitive -- replace bottom - // element in queue & adjustTop - for (int i = 0; i < numComparators; i++) { - comparators[i].copy(bottom.slot, doc, score); - } + // This hit is competitive - replace bottom element in queue & adjustTop + for (int i = 0; i < comparators.length; i++) { + comparators[i].copy(bottom.slot, doc); + } - updateBottom(doc, score); + updateBottom(doc, docScore); - for(int i=0;i= 0; i--) { - scoreDocs[i] = queue.fillFields((FieldValueHitQueue.Entry) queue.pop()); + results[i] = queue.fillFields((FieldValueHitQueue.Entry) queue.pop()); } } else { - Entry entry = (FieldValueHitQueue.Entry) queue.pop(); for (int i = queue.size() - 1; i >= 0; i--) { - scoreDocs[i] = new FieldDoc(entry.docID, - entry.score); + Entry entry = (FieldValueHitQueue.Entry) queue.pop(); + results[i] = new FieldDoc(entry.docID, entry.score); } } + } + + protected TopDocs newTopDocs(ScoreDoc[] results, int start) { + if (results == null) { + results = EMPTY_SCOREDOCS; + } - return new TopFieldDocs(totalHits, scoreDocs, queue.getFields(), maxScore); + return new TopFieldDocs(totalHits, results, ((FieldValueHitQueue) pq).getFields(), maxScore); } + } Index: src/java/org/apache/lucene/search/TopFieldDocCollector.java =================================================================== --- src/java/org/apache/lucene/search/TopFieldDocCollector.java (revision 759913) +++ src/java/org/apache/lucene/search/TopFieldDocCollector.java (working copy) @@ -30,8 +30,8 @@ * documents are collected. * * @deprecated Please use {@link TopFieldCollector} instead. - **/ -public class TopFieldDocCollector extends TopScoreDocCollector { + */ +public class TopFieldDocCollector extends TopDocCollector { private FieldDoc reusableFD; @@ -50,7 +50,7 @@ if (score > 0.0f) { totalHits++; if (reusableFD == null) - reusableFD = new FieldDoc(doc + docBase, score); + reusableFD = new FieldDoc(doc, score); else { // Whereas TopScoreDocCollector can skip this if the // score is not competitive, we cannot because the @@ -58,7 +58,7 @@ // aren't in general congruent with "higher score // wins" reusableFD.score = score; - reusableFD.doc = doc + docBase; + reusableFD.doc = doc; } reusableFD = (FieldDoc) hq.insertWithOverflow(reusableFD); } Index: src/java/org/apache/lucene/search/TopScoreDocCollector.java =================================================================== --- src/java/org/apache/lucene/search/TopScoreDocCollector.java (revision 759913) +++ src/java/org/apache/lucene/search/TopScoreDocCollector.java (working copy) @@ -17,85 +17,71 @@ * limitations under the License. */ +import java.io.IOException; + import org.apache.lucene.index.IndexReader; -import org.apache.lucene.util.PriorityQueue; -/** A {@link MultiReaderHitCollector} implementation that - * collects the top-scoring documents, returning them as a - * {@link TopDocs}. This is used by {@link IndexSearcher} - * to implement {@link TopDocs}-based search. - * - *

This may be extended, overriding the {@link - * MultiReaderHitCollector#collect} method to, e.g., - * conditionally invoke super() in order to - * filter which documents are collected, but sure you - * either take docBase into account, or also override - * {@link MultiReaderHitCollector#setNextReader} method. */ -public class TopScoreDocCollector extends MultiReaderHitCollector { +/** + * A {@link Collector} implementation that collects the top-scoring documents, + * returning them as a {@link TopDocs}. This is used by {@link IndexSearcher} to + * implement {@link TopDocs}-based search. + */ +public final class TopScoreDocCollector extends TopDocsCollector { private ScoreDoc reusableSD; - - /** The total number of hits the collector encountered. */ - protected int totalHits; - - /** The priority queue which holds the top-scoring documents. */ - protected PriorityQueue hq; - - protected int docBase = 0; + private int docBase = 0; + private Scorer scorer; /** Construct to collect a given number of hits. * @param numHits the maximum number of hits to collect */ public TopScoreDocCollector(int numHits) { - this(new HitQueue(numHits)); + super(new HitQueue(numHits)); } - /** Constructor to collect the top-scoring documents by using the given PQ. - * @param hq the PQ to use by this instance. - */ - protected TopScoreDocCollector(PriorityQueue hq) { - this.hq = hq; - } - - // javadoc inherited - public void collect(int doc, float score) { - if (score > 0.0f) { - totalHits++; - if (reusableSD == null) { - reusableSD = new ScoreDoc(doc + docBase, score); - } else if (score >= reusableSD.score) { - // reusableSD holds the last "rejected" entry, so, if - // this new score is not better than that, there's no - // need to try inserting it - reusableSD.doc = doc + docBase; - reusableSD.score = score; - } else { - return; - } - reusableSD = (ScoreDoc) hq.insertWithOverflow(reusableSD); + protected TopDocs newTopDocs(ScoreDoc[] results, int start) { + if (results == null) { + return EMPTY_TOPDOCS; } - } - - /** The total number of documents that matched this query. */ - public int getTotalHits() { - return totalHits; - } - - /** The top-scoring hits. */ - public TopDocs topDocs() { - ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()]; - for (int i = hq.size()-1; i >= 0; i--) { // put docs in array - scoreDocs[i] = (ScoreDoc) hq.pop(); + + // We need to compute maxScore in order to set it in TopDocs. If start == 0, + // it means the largest element is already in results, use its score as + // maxScore. Otherwise pop everything else, until the largest element is + // extracted and use its score as maxScore. + float maxScore = Float.NaN; + if (start == 0) { + maxScore = results[0].score; + } else { + for (int i = pq.size(); i > 1; i--) { pq.pop(); } + maxScore = ((ScoreDoc) pq.pop()).score; } - - float maxScore = (totalHits==0) - ? Float.NEGATIVE_INFINITY - : scoreDocs[0].score; - return new TopDocs(totalHits, scoreDocs, maxScore); + return new TopDocs(totalHits, results, maxScore); } + // javadoc inherited + public void collect(int doc) throws IOException { + float score = scorer.score(); + totalHits++; + if (reusableSD == null) { + reusableSD = new ScoreDoc(doc + docBase, score); + } else if (score >= reusableSD.score) { + // reusableSD holds the last "rejected" entry, so, if + // this new score is not better than that, there's no + // need to try inserting it + reusableSD.doc = doc + docBase; + reusableSD.score = score; + } else { + return; + } + reusableSD = (ScoreDoc) pq.insertWithOverflow(reusableSD); + } + public void setNextReader(IndexReader reader, int base) { docBase = base; } + + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } } Index: src/test/org/apache/lucene/index/TestIndexReader.java =================================================================== --- src/test/org/apache/lucene/index/TestIndexReader.java (revision 759913) +++ src/test/org/apache/lucene/index/TestIndexReader.java (working copy) @@ -40,8 +40,9 @@ import org.apache.lucene.document.SetBasedFieldSelector; import org.apache.lucene.index.IndexReader.FieldOption; import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.MultiReaderHitCollector; +import org.apache.lucene.search.Collector; import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Scorer; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.Query; import org.apache.lucene.store.AlreadyClosedException; @@ -1650,7 +1651,7 @@ Query q = new TermQuery(new Term("id", "a")); IndexSearcher s = new IndexSearcher(dir); - s.search(q, new MultiReaderHitCollector() { + s.search(q, new Collector() { int lastDocBase = -1; public void setNextReader(IndexReader reader, int docBase) { if (lastDocBase == -1) { @@ -1662,13 +1663,14 @@ } lastDocBase = docBase; } - public void collect(int doc, float score) {} + public void collect(int doc) {} + public void setScorer(Scorer scorer) {} }); s.close(); IndexReader r = IndexReader.open(dir); s = new IndexSearcher(r, true); - s.search(q, new MultiReaderHitCollector() { + s.search(q, new Collector() { int lastDocBase = -1; public void setNextReader(IndexReader reader, int docBase) { if (lastDocBase == -1) { @@ -1680,7 +1682,8 @@ } lastDocBase = docBase; } - public void collect(int doc, float score) {} + public void collect(int doc) {} + public void setScorer(Scorer scorer) {} }); s.close(); r.close(); Index: src/test/org/apache/lucene/index/TestOmitTf.java =================================================================== --- src/test/org/apache/lucene/index/TestOmitTf.java (revision 759913) +++ src/test/org/apache/lucene/index/TestOmitTf.java (working copy) @@ -17,6 +17,7 @@ * limitations under the License. */ +import java.io.IOException; import java.util.Collection; import org.apache.lucene.util.LuceneTestCase; @@ -27,7 +28,8 @@ import org.apache.lucene.document.Field; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.MultiReaderHitCollector; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Searcher; import org.apache.lucene.search.Similarity; import org.apache.lucene.search.TermQuery; @@ -283,10 +285,15 @@ searcher.search(q1, new CountingHitCollector() { - public final void collect(int doc, float score) { + private Scorer scorer; + public final void setScorer(Scorer scorer) { + this.scorer = scorer; + } + public final void collect(int doc) throws IOException { //System.out.println("Q1: Doc=" + doc + " score=" + score); + float score = scorer.score(); assertTrue(score==1.0f); - super.collect(doc, score); + super.collect(doc); } }); //System.out.println(CountingHitCollector.getCount()); @@ -294,10 +301,15 @@ searcher.search(q2, new CountingHitCollector() { - public final void collect(int doc, float score) { - //System.out.println("Q2: Doc=" + doc + " score=" + score); + private Scorer scorer; + public final void setScorer(Scorer scorer) { + this.scorer = scorer; + } + public final void collect(int doc) throws IOException { + //System.out.println("Q2: Doc=" + doc + " score=" + score); + float score = scorer.score(); assertTrue(score==1.0f+doc); - super.collect(doc, score); + super.collect(doc); } }); //System.out.println(CountingHitCollector.getCount()); @@ -308,11 +320,16 @@ searcher.search(q3, new CountingHitCollector() { - public final void collect(int doc, float score) { + private Scorer scorer; + public final void setScorer(Scorer scorer) { + this.scorer = scorer; + } + public final void collect(int doc) throws IOException { //System.out.println("Q1: Doc=" + doc + " score=" + score); + float score = scorer.score(); assertTrue(score==1.0f); assertFalse(doc%2==0); - super.collect(doc, score); + super.collect(doc); } }); //System.out.println(CountingHitCollector.getCount()); @@ -320,11 +337,16 @@ searcher.search(q4, new CountingHitCollector() { - public final void collect(int doc, float score) { + private Scorer scorer; + public final void setScorer(Scorer scorer) { + this.scorer = scorer; + } + public final void collect(int doc) throws IOException { + float score = scorer.score(); //System.out.println("Q1: Doc=" + doc + " score=" + score); assertTrue(score==1.0f); assertTrue(doc%2==0); - super.collect(doc, score); + super.collect(doc); } }); //System.out.println(CountingHitCollector.getCount()); @@ -337,9 +359,9 @@ searcher.search(bq, new CountingHitCollector() { - public final void collect(int doc, float score) { + public final void collect(int doc) throws IOException { //System.out.println("BQ: Doc=" + doc + " score=" + score); - super.collect(doc, score); + super.collect(doc); } }); assertTrue(15 == CountingHitCollector.getCount()); @@ -348,12 +370,13 @@ dir.close(); } - public static class CountingHitCollector extends MultiReaderHitCollector { + public static class CountingHitCollector extends Collector { static int count=0; static int sum=0; private int docBase = -1; CountingHitCollector(){count=0;sum=0;} - public void collect(int doc, float score) { + public void setScorer(Scorer scorer) throws IOException {} + public void collect(int doc) throws IOException { count++; sum += doc + docBase; // use it to avoid any possibility of being optimized away } Index: src/test/org/apache/lucene/search/CheckHits.java =================================================================== --- src/test/org/apache/lucene/search/CheckHits.java (revision 759913) +++ src/test/org/apache/lucene/search/CheckHits.java (working copy) @@ -89,9 +89,10 @@ } final Set actual = new TreeSet(); - searcher.search(query, new MultiReaderHitCollector() { + searcher.search(query, new Collector() { private int base = -1; - public void collect(int doc, float score) { + public void setScorer(Scorer scorer) throws IOException {} + public void collect(int doc) { actual.add(new Integer(doc + base)); } @@ -390,14 +391,22 @@ checkExplanations(query); return super.search(query,filter,n,sort); } + /** @deprecated use {@link #search(Query, Collector)} instead. */ public void search(Query query, HitCollector results) throws IOException { + search(query, new HitCollectorWrapper(results)); + } + public void search(Query query, Collector results) throws IOException { checkExplanations(query); - super.search(query,results); + super.search(query, results); } + /** @deprecated use {@link #search(Query, Filter, Collector)} instead. */ public void search(Query query, Filter filter, HitCollector results) throws IOException { + search(query, filter, new HitCollectorWrapper(results)); + } + public void search(Query query, Filter filter, Collector results) throws IOException { checkExplanations(query); - super.search(query,filter, results); + super.search(query, filter, results); } public TopDocs search(Query query, Filter filter, int n) throws IOException { @@ -416,7 +425,7 @@ * * @see CheckHits#verifyExplanation */ - public static class ExplanationAsserter extends MultiReaderHitCollector { + public static class ExplanationAsserter extends Collector { /** * @deprecated @@ -428,7 +437,8 @@ Searcher s; String d; boolean deep; - + + Scorer scorer; private int base = -1; /** Constructs an instance which does shallow tests on the Explanation */ @@ -441,8 +451,12 @@ this.d = q.toString(defaultFieldName); this.deep=deep; } - - public void collect(int doc, float score) { + + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } + + public void collect(int doc) throws IOException { Explanation exp = null; doc = doc + base; try { @@ -454,7 +468,7 @@ TestCase.assertNotNull("Explanation of [["+d+"]] for #"+doc+" is null", exp); - verifyExplanation(d,doc,score,deep,exp); + verifyExplanation(d,doc,scorer.score(),deep,exp); } public void setNextReader(IndexReader reader, int docBase) { base = docBase; Index: src/test/org/apache/lucene/search/QueryUtils.java =================================================================== --- src/test/org/apache/lucene/search/QueryUtils.java (revision 759913) +++ src/test/org/apache/lucene/search/QueryUtils.java (working copy) @@ -153,10 +153,15 @@ final int[] sdoc = new int[] {-1}; final float maxDiff = 1e-5f; - s.search(q,new MultiReaderHitCollector() { + s.search(q,new Collector() { private int base = -1; - public void collect(int doc, float score) { + private Scorer sc; + public void setScorer(Scorer scorer) throws IOException { + this.sc = scorer; + } + public void collect(int doc) throws IOException { doc = doc + base; + float score = sc.score(); try { int op = order[(opidx[0]++)%order.length]; //System.out.println(op==skip_op ? "skip("+(sdoc[0]+1)+")":"next()"); @@ -205,11 +210,16 @@ //System.out.println("checkFirstSkipTo: "+q); final float maxDiff = 1e-5f; final int lastDoc[] = {-1}; - s.search(q,new MultiReaderHitCollector() { + s.search(q,new Collector() { private int base = -1; - public void collect(int doc, float score) { + private Scorer scorer; + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } + public void collect(int doc) throws IOException { //System.out.println("doc="+doc); doc = doc + base; + float score = scorer.score(); try { for (int i=lastDoc[0]+1; i<=doc; i++) { Weight w = q.weight(s); Index: src/test/org/apache/lucene/search/TestDocBoost.java =================================================================== --- src/test/org/apache/lucene/search/TestDocBoost.java (revision 759913) +++ src/test/org/apache/lucene/search/TestDocBoost.java (working copy) @@ -17,6 +17,8 @@ * limitations under the License. */ +import java.io.IOException; + import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.analysis.SimpleAnalyzer; import org.apache.lucene.document.*; @@ -66,11 +68,15 @@ new IndexSearcher(store).search (new TermQuery(new Term("field", "word")), - new MultiReaderHitCollector() { + new Collector() { private int base = -1; - public final void collect(int doc, float score) { - scores[doc + base] = score; + private Scorer scorer; + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; } + public final void collect(int doc) throws IOException { + scores[doc + base] = scorer.score(); + } public void setNextReader(IndexReader reader, int docBase) { base = docBase; } Index: src/test/org/apache/lucene/search/TestMultiTermConstantScore.java =================================================================== --- src/test/org/apache/lucene/search/TestMultiTermConstantScore.java (revision 759913) +++ src/test/org/apache/lucene/search/TestMultiTermConstantScore.java (working copy) @@ -168,11 +168,15 @@ // must use a non score normalizing method for this. Query q = csrq("data", "1", "6", T, T); q.setBoost(100); - search.search(q, null, new MultiReaderHitCollector() { + search.search(q, null, new Collector() { private int base = -1; - public void collect(int doc, float score) { - assertEquals("score for doc " + (doc + base) + " was not correct", 1.0f, score); + private Scorer scorer; + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; } + public void collect(int doc) throws IOException { + assertEquals("score for doc " + (doc + base) + " was not correct", 1.0f, scorer.score()); + } public void setNextReader(IndexReader reader, int docBase) { base = docBase; } Index: src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java =================================================================== --- src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java (revision 0) +++ src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java (revision 0) @@ -0,0 +1,111 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.LuceneTestCase; + +public class TestScoreCachingWrappingScorer extends LuceneTestCase { + + private static final class SimpleScorer extends Scorer { + private int idx = 0; + private int doc = -1; + + public SimpleScorer() { + super(null); + } + + public Explanation explain(int doc) throws IOException { return null; } + + public float score() throws IOException { + // advance idx on purpose, so that consecutive calls to score will get + // different results. This is to emulate computation of a score. If + // ScoreCachingWrappingScorer is used, this should not be called more than + // once per document. + return idx == scores.length ? Float.NaN : scores[idx++]; + } + + public int doc() { return doc; } + + public boolean next() throws IOException { + return ++doc == scores.length; + } + + public boolean skipTo(int target) throws IOException { + doc = target; + return doc >= scores.length; + } + } + + private static final class ScoreCachingCollector extends Collector { + + private int idx = 0; + private Scorer scorer; + float[] mscores; + + public ScoreCachingCollector(int numToCollect) { + mscores = new float[numToCollect]; + } + + public void collect(int doc) throws IOException { + // just a sanity check to avoid IOOB. + if (idx == mscores.length) { + return; + } + + // just call score() a couple of times and record the score. + mscores[idx] = scorer.score(); + mscores[idx] = scorer.score(); + mscores[idx] = scorer.score(); + ++idx; + } + + public void setNextReader(IndexReader reader, int docBase) + throws IOException { + } + + public void setScorer(Scorer scorer) throws IOException { + this.scorer = new ScoreCachingWrappingScorer(scorer); + } + + } + + private static final float[] scores = new float[] { 0.7767749f, 1.7839992f, + 8.9925785f, 7.9608946f, 0.07948637f, 2.6356435f, 7.4950366f, 7.1490803f, + 8.108544f, 4.961808f, 2.2423935f, 7.285586f, 4.6699767f }; + + public void testGetScores() throws Exception { + + Scorer s = new SimpleScorer(); + ScoreCachingCollector scc = new ScoreCachingCollector(scores.length); + scc.setScorer(s); + + // We need to iterate on the scorer so that its doc() advances. + while (!s.next()) { + scc.collect(s.doc()); + } + + for (int i = 0; i < scores.length; i++) { + assertEquals(scores[i], scc.mscores[i], 0f); + } + + } + +} Index: src/test/org/apache/lucene/search/TestScorerPerf.java =================================================================== --- src/test/org/apache/lucene/search/TestScorerPerf.java (revision 759913) +++ src/test/org/apache/lucene/search/TestScorerPerf.java (working copy) @@ -96,12 +96,14 @@ return sets; } - public static class CountingHitCollector extends MultiReaderHitCollector { + public static class CountingHitCollector extends Collector { int count=0; int sum=0; protected int docBase = -1; - public void collect(int doc, float score) { + public void setScorer(Scorer scorer) throws IOException {} + + public void collect(int doc) { count++; sum += docBase+doc; // use it to avoid any possibility of being optimized away } @@ -123,11 +125,12 @@ } public void collect(int doc, float score) { + pos = answer.nextSetBit(pos+1); if (pos != doc + docBase) { throw new RuntimeException("Expected doc " + pos + " but got " + doc + docBase); } - super.collect(doc,score); + super.collect(doc); } } Index: src/test/org/apache/lucene/search/TestSetNorm.java =================================================================== --- src/test/org/apache/lucene/search/TestSetNorm.java (revision 759913) +++ src/test/org/apache/lucene/search/TestSetNorm.java (working copy) @@ -17,6 +17,8 @@ * limitations under the License. */ +import java.io.IOException; + import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.analysis.SimpleAnalyzer; import org.apache.lucene.document.*; @@ -62,11 +64,15 @@ new IndexSearcher(store).search (new TermQuery(new Term("field", "word")), - new MultiReaderHitCollector() { + new Collector() { private int base = -1; - public final void collect(int doc, float score) { - scores[doc + base] = score; + private Scorer scorer; + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; } + public final void collect(int doc) throws IOException { + scores[doc + base] = scorer.score(); + } public void setNextReader(IndexReader reader, int docBase) { base = docBase; } Index: src/test/org/apache/lucene/search/TestSimilarity.java =================================================================== --- src/test/org/apache/lucene/search/TestSimilarity.java (revision 759913) +++ src/test/org/apache/lucene/search/TestSimilarity.java (working copy) @@ -19,6 +19,7 @@ import org.apache.lucene.util.LuceneTestCase; +import java.io.IOException; import java.util.Collection; import org.apache.lucene.index.IndexReader; @@ -75,10 +76,14 @@ searcher.search (new TermQuery(b), - new MultiReaderHitCollector() { - public final void collect(int doc, float score) { - assertTrue(score == 1.0f); + new Collector() { + private Scorer scorer; + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; } + public final void collect(int doc) throws IOException { + assertTrue(scorer.score() == 1.0f); + } public void setNextReader(IndexReader reader, int docBase) {} }); @@ -88,11 +93,15 @@ //System.out.println(bq.toString("field")); searcher.search (bq, - new MultiReaderHitCollector() { + new Collector() { private int base = -1; - public final void collect(int doc, float score) { + private Scorer scorer; + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } + public final void collect(int doc) throws IOException { //System.out.println("Doc=" + doc + " score=" + score); - assertTrue(score == (float)doc+base+1); + assertTrue(scorer.score() == (float)doc+base+1); } public void setNextReader(IndexReader reader, int docBase) { base = docBase; @@ -105,10 +114,14 @@ //System.out.println(pq.toString("field")); searcher.search (pq, - new MultiReaderHitCollector() { - public final void collect(int doc, float score) { + new Collector() { + private Scorer scorer; + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } + public final void collect(int doc) throws IOException { //System.out.println("Doc=" + doc + " score=" + score); - assertTrue(score == 1.0f); + assertTrue(scorer.score() == 1.0f); } public void setNextReader(IndexReader reader, int docBase) {} }); @@ -117,10 +130,14 @@ //System.out.println(pq.toString("field")); searcher.search (pq, - new MultiReaderHitCollector() { - public final void collect(int doc, float score) { + new Collector() { + private Scorer scorer; + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } + public final void collect(int doc) throws IOException { //System.out.println("Doc=" + doc + " score=" + score); - assertTrue(score == 2.0f); + assertTrue(scorer.score() == 2.0f); } public void setNextReader(IndexReader reader, int docBase) {} }); Index: src/test/org/apache/lucene/search/TestSort.java =================================================================== --- src/test/org/apache/lucene/search/TestSort.java (revision 759913) +++ src/test/org/apache/lucene/search/TestSort.java (working copy) @@ -413,7 +413,7 @@ slotValues = new int[numHits]; } - public void copy(int slot, int doc, float score) { + public void copy(int slot, int doc) { slotValues[slot] = docValues[doc]; } @@ -421,7 +421,7 @@ return slotValues[slot1] - slotValues[slot2]; } - public int compareBottom(int doc, float score) { + public int compareBottom(int doc) { return bottomValue - docValues[doc]; } @@ -447,7 +447,7 @@ } static class MyFieldComparatorSource extends FieldComparatorSource { - public FieldComparator newComparator(String fieldname, IndexReader[] subReaders, int numHits, int sortPos, boolean reversed) { + public FieldComparator newComparator(String fieldname, int numHits, int sortPos, boolean reversed) { return new MyFieldComparator(numHits); } } @@ -803,7 +803,27 @@ assertEquals(docs1.scoreDocs[0].score, docs2.scoreDocs[0].score, 1e-6); } + + public void testSortWithoutFillFields() throws Exception { + + // There was previously a bug in TopFieldCollector when fillFields was set + // to false - the same doc and score was set in ScoreDoc[] array. This test + // asserts that if fillFields is false, the documents are set properly. It + // does not use Searcher's default search methods (with Sort) since all set + // fillFields to true. + Sort sort = new Sort(); + int nDocs=10; + + TopDocsCollector tdc = TopFieldCollector.create(sort, nDocs, false, false); + + full.search(new MatchAllDocsQuery(), tdc); + ScoreDoc[] sd = tdc.topDocs().scoreDocs; + for (int i = 1; i < sd.length; i++) { + assertTrue(sd[i].doc != sd[i - 1].doc); + } + } + // runs a variety of sorts useful for multisearchers private void runMultiSorts (Searcher multi) throws Exception { sort.setSort (SortField.FIELD_DOC); Index: src/test/org/apache/lucene/search/TestTermScorer.java =================================================================== --- src/test/org/apache/lucene/search/TestTermScorer.java (revision 759913) +++ src/test/org/apache/lucene/search/TestTermScorer.java (working copy) @@ -82,14 +82,18 @@ //must call next first - ts.score(new MultiReaderHitCollector() - { + ts.score(new Collector() { private int base = -1; - public void collect(int doc, float score) - { - docs.add(new TestHit(doc + base, score)); - assertTrue("score " + score + " is not greater than 0", score > 0); - assertTrue("Doc: " + doc + " does not equal: " + 0 + + private Scorer scorer; + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } + + public void collect(int doc) throws IOException { + float score = scorer.score(); + docs.add(new TestHit(doc + base, score)); + assertTrue("score " + score + " is not greater than 0", score > 0); + assertTrue("Doc: " + doc + " does not equal: " + 0 + " or doc does not equaal: " + 5, doc == 0 || doc == 5); } public void setNextReader(IndexReader reader, int docBase) { Index: src/test/org/apache/lucene/search/TestTimeLimitedCollector.java =================================================================== --- src/test/org/apache/lucene/search/TestTimeLimitedCollector.java (revision 759913) +++ src/test/org/apache/lucene/search/TestTimeLimitedCollector.java (working copy) @@ -101,7 +101,7 @@ iw.addDocument(d); } - private void search(HitCollector collector) throws Exception { + private void search(Collector collector) throws Exception { searcher.search(query, collector); } @@ -122,7 +122,7 @@ myHc = new MyHitCollector(); long oneHour = 3600000; - HitCollector tlCollector = createTimedCollector(myHc, oneHour, false); + Collector tlCollector = createTimedCollector(myHc, oneHour, false); search(tlCollector); totalTLCResults = myHc.hitCount(); } catch (Exception e) { @@ -132,7 +132,7 @@ assertEquals( "Wrong number of results!", totalResults, totalTLCResults ); } - private HitCollector createTimedCollector(MyHitCollector hc, long timeAllowed, boolean greedy) { + private Collector createTimedCollector(MyHitCollector hc, long timeAllowed, boolean greedy) { TimeLimitedCollector res = new TimeLimitedCollector(hc, timeAllowed); res.setGreedy(greedy); // set to true to make sure at least one doc is collected. return res; @@ -156,7 +156,7 @@ // setup MyHitCollector myHc = new MyHitCollector(); myHc.setSlowDown(SLOW_DOWN); - HitCollector tlCollector = createTimedCollector(myHc, TIME_ALLOWED, greedy); + Collector tlCollector = createTimedCollector(myHc, TIME_ALLOWED, greedy); // search TimeLimitedCollector.TimeExceededException timoutException = null; @@ -287,8 +287,7 @@ } // counting hit collector that can slow down at collect(). - private class MyHitCollector extends MultiReaderHitCollector - { + private class MyHitCollector extends Collector { private final BitSet bits = new BitSet(); private int slowdown = 0; private int lastDocCollected = -1; @@ -301,7 +300,11 @@ slowdown = milliseconds; } - public void collect( final int doc, final float score ) { + public void setScorer(Scorer scorer) throws IOException { + // scorer is not needed + } + + public void collect(final int doc) throws IOException { int docId = doc + docBase; if( slowdown > 0 ) { try { Index: src/test/org/apache/lucene/search/TestTopDocsCollector.java =================================================================== --- src/test/org/apache/lucene/search/TestTopDocsCollector.java (revision 0) +++ src/test/org/apache/lucene/search/TestTopDocsCollector.java (revision 0) @@ -0,0 +1,198 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.analysis.KeywordAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriter.MaxFieldLength; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.LuceneTestCase; + +public class TestTopDocsCollector extends LuceneTestCase { + + private static final class MyTopsDocCollector extends TopDocsCollector { + + private int idx = 0; + private int base = -1; + + public MyTopsDocCollector(int size) { + super(new HitQueue(size)); + } + + protected TopDocs newTopDocs(ScoreDoc[] results, int start) { + if (results == null) { + return EMPTY_TOPDOCS; + } + + float maxScore = Float.NaN; + if (start == 0) { + maxScore = results[0].score; + } else { + for (int i = pq.size(); i > 1; i--) { pq.pop(); } + maxScore = ((ScoreDoc) pq.pop()).score; + } + + return new TopDocs(totalHits, results, maxScore); + } + + public void collect(int doc) throws IOException { + pq.insert(new ScoreDoc(doc + base, scores[idx++])); + } + + public void setNextReader(IndexReader reader, int docBase) + throws IOException { + base = docBase; + } + + public void setScorer(Scorer scorer) throws IOException { + // Don't do anything. Assign scores in random + } + + } + + // Scores array to be used by MyTopDocsCollector. If it is changed, MAX_SCORE + // must also change. + private static final float[] scores = new float[] { + 0.7767749f, 1.7839992f, 8.9925785f, 7.9608946f, 0.07948637f, 2.6356435f, + 7.4950366f, 7.1490803f, 8.108544f, 4.961808f, 2.2423935f, 7.285586f, 4.6699767f, + 2.9655676f, 6.953706f, 5.383931f, 6.9916306f, 8.365894f, 7.888485f, 8.723962f, + 3.1796896f, 0.39971232f, 1.3077754f, 6.8489285f, 9.17561f, 5.060466f, 7.9793315f, + 8.601509f, 4.1858315f, 0.28146625f + }; + + private static final float MAX_SCORE = 9.17561f; + + private Directory dir = new RAMDirectory(); + + private TopDocsCollector doSearch(int numResults) throws IOException { + Query q = new MatchAllDocsQuery(); + IndexSearcher searcher = new IndexSearcher(dir); + TopDocsCollector tdc = new MyTopsDocCollector(numResults); + searcher.search(q, tdc); + searcher.close(); + return tdc; + } + + protected void setUp() throws Exception { + super.setUp(); + + // populate an index with 30 documents, this should be enough for the test. + // The documents have no content - the test uses MatchAllDocsQuery(). + IndexWriter writer = new IndexWriter(dir, new KeywordAnalyzer(), MaxFieldLength.UNLIMITED); + for (int i = 0; i < 30; i++) { + writer.addDocument(new Document()); + } + writer.close(); + } + + protected void tearDown() throws Exception { + dir.close(); + dir = null; + super.tearDown(); + } + + public void testInvalidArguments() throws Exception { + int numResults = 5; + TopDocsCollector tdc = doSearch(numResults); + + // start < 0 + assertEquals(0, tdc.topDocs(-1).scoreDocs.length); + + // start > pq.size() + assertEquals(0, tdc.topDocs(numResults + 1).scoreDocs.length); + + // start == pq.size() + assertEquals(0, tdc.topDocs(numResults).scoreDocs.length); + + // howMany < 0 + assertEquals(0, tdc.topDocs(0, -1).scoreDocs.length); + + // howMany == 0 + assertEquals(0, tdc.topDocs(0, 0).scoreDocs.length); + + } + + public void testZeroResults() throws Exception { + TopDocsCollector tdc = new MyTopsDocCollector(5); + assertEquals(0, tdc.topDocs(0, 1).scoreDocs.length); + } + + public void testFirstResultsPage() throws Exception { + TopDocsCollector tdc = doSearch(15); + assertEquals(10, tdc.topDocs(0, 10).scoreDocs.length); + } + + public void testSecondResultsPages() throws Exception { + TopDocsCollector tdc = doSearch(15); + // ask for more results than are available + assertEquals(5, tdc.topDocs(10, 10).scoreDocs.length); + + // ask for 5 results (exactly what there should be + tdc = doSearch(15); + assertEquals(5, tdc.topDocs(10, 5).scoreDocs.length); + + // ask for less results than there are + tdc = doSearch(15); + assertEquals(4, tdc.topDocs(10, 4).scoreDocs.length); + } + + public void testGetAllResults() throws Exception { + TopDocsCollector tdc = doSearch(15); + assertEquals(15, tdc.topDocs().scoreDocs.length); + } + + public void testGetResultsFromStart() throws Exception { + TopDocsCollector tdc = doSearch(15); + // should bring all results + assertEquals(15, tdc.topDocs(0).scoreDocs.length); + + tdc = doSearch(15); + // get the last 5 only. + assertEquals(5, tdc.topDocs(10).scoreDocs.length); + } + + public void testMaxScore() throws Exception { + // ask for all results + TopDocsCollector tdc = doSearch(15); + TopDocs td = tdc.topDocs(); + assertEquals(MAX_SCORE, td.getMaxScore(), 0f); + + // ask for 5 last results + tdc = doSearch(15); + td = tdc.topDocs(10); + assertEquals(MAX_SCORE, td.getMaxScore(), 0f); + } + + // This does not test the PQ's correctness, but whether topDocs() + // implementations return the results in decreasing score order. + public void testResultsOrder() throws Exception { + TopDocsCollector tdc = doSearch(15); + ScoreDoc[] sd = tdc.topDocs().scoreDocs; + + assertEquals(MAX_SCORE, sd[0].score, 0f); + for (int i = 1; i < sd.length; i++) { + assertTrue(sd[i - 1].score >= sd[i].score); + } + } + +}