+ TopFieldCollector tfc = TopFieldCollector.create(sort, numHits, fillFields,
+ true /* trackDocScores */);
+ searcher.search(weight, filter, tfc);
+ TopDocs results = tfc.topDocs();
+
+
+ Also, the method search(Weight, Filter, Collector) was added to
+ the Searchable interface and the Searcher abstract class, to
+ replace the deprecated HitCollector versions. If you either
+ implement Searchable or extend Searcher, you should change you
+ code to implement this method. If you already extend
+ IndexSearcher, no further changes are needed to use Collector.
+ (Shai Erera via Mike McCandless)
+
Changes in runtime behavior
1. LUCENE-1424: QueryParser now by default uses constant score query
@@ -10,7 +35,20 @@
already does so for RangeQuery, as well). Call
setConstantScoreRewrite(false) to revert to BooleanQuery rewriting
method. (Mark Miller via Mike McCandless)
+
+ 2. LUCENE-1575: As of 2.9, the core collectors as well as
+ IndexSearcher's search methods that return top N results, no
+ longer filter out zero scoring documents. If you rely on this
+ functionaliy you can use PositiveScoresOnlyCollector like this:
+
+ TopDocsCollector tdc = new TopScoreDocCollector(10);
+ Collector c = new PositiveScoresOnlyCollector(tdc);
+ searcher.search(query, c);
+ TopDocs hits = tdc.topDocs();
+ ...
+
+
API Changes
1. LUCENE-1419: Add expert API to set custom indexing chain. This API is
@@ -69,6 +107,14 @@
12. LUCENE-1500: Added new InvalidTokenOffsetsException to Highlighter methods
to denote issues when offsets in TokenStream tokens exceed the length of the
provided text. (Mark Harwood)
+
+13. LUCENE-1575: HitCollector is now deprecated in favor of a new
+ Collector abstract class. For easy migration, people can use
+ HitCollectorWrapper which translates (wraps) HitCollector into
+ Collector. Note that this class is also deprecated and will be
+ removed when HitCollector is removed. Also TimeLimitedCollector
+ is deprecated in favor of the new TimeLimitingCollector which
+ extends Collector. (Shai Erera via Mike McCandless)
Bug fixes
@@ -240,6 +286,12 @@
those segments that did not change, and also speeds up searches
that sort by relevance or by field values. (Mark Miller, Mike
McCandless)
+
+ 7. LUCENE-1575: The new Collector class decouples collect() from
+ score computation. Collector.setScorer is called to establish the
+ current Scorer in-use per segment. Collectors that require the
+ score should then call Scorer.score() per hit inside
+ collect(). (Shai Erera via Mike McCandless)
Documentation
Index: src/test/org/apache/lucene/search/TestTimeLimitingCollector.java
===================================================================
--- src/test/org/apache/lucene/search/TestTimeLimitingCollector.java (revision 0)
+++ src/test/org/apache/lucene/search/TestTimeLimitingCollector.java (revision 0)
@@ -0,0 +1,337 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.BitSet;
+
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriter.MaxFieldLength;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.TimeLimitingCollector.TimeExceededException;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.LuceneTestCase;
+
+/**
+ * Tests the {@link TimeLimitingCollector}. This test checks (1) search
+ * correctness (regardless of timeout), (2) expected timeout behavior,
+ * and (3) a sanity test with multiple searching threads.
+ */
+public class TestTimeLimitingCollector extends LuceneTestCase {
+ private static final int SLOW_DOWN = 47;
+ private static final long TIME_ALLOWED = 17 * SLOW_DOWN; // so searches can find about 17 docs.
+
+ // max time allowed is relaxed for multithreading tests.
+ // the multithread case fails when setting this to 1 (no slack) and launching many threads (>2000).
+ // but this is not a real failure, just noise.
+ private static final double MULTI_THREAD_SLACK = 7;
+
+ private static final int N_DOCS = 3000;
+ private static final int N_THREADS = 50;
+
+ private Searcher searcher;
+ private final String FIELD_NAME = "body";
+ private Query query;
+
+ public TestTimeLimitingCollector(String name) {
+ super(name);
+ }
+
+ /**
+ * initializes searcher with a document set
+ */
+ protected void setUp() throws Exception {
+ final String docText[] = {
+ "docThatNeverMatchesSoWeCanRequireLastDocCollectedToBeGreaterThanZero",
+ "one blah three",
+ "one foo three multiOne",
+ "one foobar three multiThree",
+ "blueberry pancakes",
+ "blueberry pie",
+ "blueberry strudel",
+ "blueberry pizza",
+ };
+ Directory directory = new RAMDirectory();
+ IndexWriter iw = new IndexWriter(directory, new WhitespaceAnalyzer(), true, MaxFieldLength.UNLIMITED);
+
+ for (int i=0; iFieldCache.DEFAULT for maintaining
@@ -32,12 +32,12 @@
* NOTE: This API is experimental and might change in
* incompatible ways in the next release.
*
- * @since lucene 2.9
+ * @since 2.9
* @version $Id:
* @see Searcher#search(Query,Filter,int,Sort)
* @see FieldCache
*/
-public class FieldValueHitQueue extends PriorityQueue {
+public abstract class FieldValueHitQueue extends PriorityQueue {
final static class Entry {
int slot;
@@ -56,136 +56,185 @@
}
/**
- * Creates a hit queue sorted by the given list of fields.
- * @param fields SortField array we are sorting by in
- * priority order (highest priority first); cannot be null or empty
- * @param size The number of hits to retain. Must be
- * greater than zero.
- * @param subReaders Array of IndexReaders we will search,
- * in order that they will be searched
- * @throws IOException
+ * An implementation of {@link FieldValueHitQueue} which is optimized in case
+ * there is just one comparator.
*/
- public FieldValueHitQueue(SortField[] fields, int size, IndexReader[] subReaders) throws IOException {
- numComparators = fields.length;
- comparators = new FieldComparator[numComparators];
- reverseMul = new int[numComparators];
+ private static final class OneComparatorFieldValueHitQueue extends FieldValueHitQueue {
- if (fields.length == 0) {
- throw new IllegalArgumentException("Sort must contain at least one field");
- }
+ private final FieldComparator comparator;
+ private final int oneReverseMul;
+
+ public OneComparatorFieldValueHitQueue(SortField[] fields, int size)
+ throws IOException {
+ super(fields);
+ if (fields.length == 0) {
+ throw new IllegalArgumentException("Sort must contain at least one field");
+ }
- this.fields = fields;
- for (int i=0; ib.
+ * @param a ScoreDoc
+ * @param b ScoreDoc
+ * @return true if document a should be sorted after document b.
+ */
+ protected boolean lessThan(final Object a, final Object b) {
+ final Entry hitA = (Entry) a;
+ final Entry hitB = (Entry) b;
+
+ assert hitA != hitB;
+ assert hitA.slot != hitB.slot;
+
+ final int c = oneReverseMul * comparator.compare(hitA.slot, hitB.slot);
+ if (c != 0) {
+ return c > 0;
+ }
+
+ // avoid random sort order that could lead to duplicates (bug #31241):
+ return hitA.docID > hitB.docID;
}
- initialize(size);
}
- /** Stores a comparator corresponding to each field being sorted by */
- private final FieldComparator[] comparators;
- private final FieldComparator comparator1;
- private final int numComparators;
- private final int[] reverseMul;
- private final int reverseMul1;
+ /**
+ * An implementation of {@link FieldValueHitQueue} which is optimized in case
+ * there is more than one comparator.
+ */
+ private static final class MultiComparatorsFieldValueHitQueue extends FieldValueHitQueue {
- FieldComparator[] getComparators() {
- return comparators;
- }
+ public MultiComparatorsFieldValueHitQueue(SortField[] fields, int size)
+ throws IOException {
+ super(fields);
- int[] getReverseMul() {
- return reverseMul;
- }
+ int numComparators = comparators.length;
+ for (int i = 0; i < numComparators; ++i) {
+ SortField field = fields[i];
- /** Stores the sort criteria being used. */
- private final SortField[] fields;
+ // AUTO is resolved before we are called
+ assert field.getType() != SortField.AUTO;
- /**
- * Returns whether a is less relevant than b.
- * @param a ScoreDoc
- * @param b ScoreDoc
- * @return true if document a should be sorted after document b.
- */
- protected boolean lessThan (final Object a, final Object b) {
- final Entry hitA = (Entry) a;
- final Entry hitB = (Entry) b;
+ reverseMul[i] = field.reverse ? -1 : 1;
+ comparators[i] = field.getComparator(size, i, field.reverse);
+ }
- assert hitA != hitB;
- assert hitA.slot != hitB.slot;
+ initialize(size);
+ }
+
+ protected boolean lessThan(Object a, Object b) {
+ final Entry hitA = (Entry) a;
+ final Entry hitB = (Entry) b;
- if (numComparators == 1) {
- // Common case
- final int c = reverseMul1 * comparator1.compare(hitA.slot, hitB.slot);
- if (c != 0) {
- return c > 0;
- }
- } else {
- // run comparators
- for (int i=0; inull or empty
+ * @param size
+ * The number of hits to retain. Must be greater than zero.
+ * @throws IOException
+ */
+ public static FieldValueHitQueue create(SortField[] fields, int size) throws IOException {
+
+ if (fields.length == 0) {
+ throw new IllegalArgumentException("Sort must contain at least one field");
+ }
+
+ if (fields.length == 1) {
+ return new OneComparatorFieldValueHitQueue(fields, size);
+ } else {
+ return new MultiComparatorsFieldValueHitQueue(fields, size);
+ }
}
+
+ FieldComparator[] getComparators() { return comparators; }
+ int[] getReverseMul() { return reverseMul; }
+ /** Stores the sort criteria being used. */
+ protected final SortField[] fields;
+ protected final FieldComparator[] comparators;
+ protected final int[] reverseMul;
+
+ protected abstract boolean lessThan (final Object a, final Object b);
+
/**
- * Given a FieldDoc object, stores the values used
- * to sort the given document. These values are not the raw
- * values out of the index, but the internal representation
- * of them. This is so the given search hit can be collated
- * by a MultiSearcher with other search hits.
- * @param doc The FieldDoc to store sort values into.
- * @return The same FieldDoc passed in.
+ * Given a FieldDoc object, stores the values used to sort the given document.
+ * These values are not the raw values out of the index, but the internal
+ * representation of them. This is so the given search hit can be collated by
+ * a MultiSearcher with other search hits.
+ *
+ * @param doc
+ * The FieldDoc to store sort values into.
+ * @return The same FieldDoc passed in.
* @see Searchable#search(Weight,Filter,int,Sort)
*/
- FieldDoc fillFields (final Entry entry) {
+ FieldDoc fillFields(final Entry entry) {
final int n = comparators.length;
final Comparable[] fields = new Comparable[n];
- for (int i=0; iThis may be extended, overriding the {@link
- * MultiReaderHitCollector#collect} method to, e.g.,
- * conditionally invoke super() in order to
- * filter which documents are collected, but sure you
- * either take docBase into account, or also override
- * {@link MultiReaderHitCollector#setNextReader} method. */
-public class TopScoreDocCollector extends MultiReaderHitCollector {
+/**
+ * A {@link Collector} implementation that collects the
+ * top-scoring hits, returning them as a {@link
+ * TopDocs}. This is used by {@link IndexSearcher} to
+ * implement {@link TopDocs}-based search. Hits are sorted
+ * by score descending and then (when the scores are tied)
+ * docID ascending.
+ */
+public final class TopScoreDocCollector extends TopDocsCollector {
private ScoreDoc reusableSD;
-
- /** The total number of hits the collector encountered. */
- protected int totalHits;
-
- /** The priority queue which holds the top-scoring documents. */
- protected PriorityQueue hq;
-
- protected int docBase = 0;
+ private int docBase = 0;
+ private Scorer scorer;
/** Construct to collect a given number of hits.
* @param numHits the maximum number of hits to collect
*/
public TopScoreDocCollector(int numHits) {
- this(new HitQueue(numHits));
+ super(new HitQueue(numHits));
}
- /** Constructor to collect the top-scoring documents by using the given PQ.
- * @param hq the PQ to use by this instance.
- */
- protected TopScoreDocCollector(PriorityQueue hq) {
- this.hq = hq;
- }
-
- // javadoc inherited
- public void collect(int doc, float score) {
- if (score > 0.0f) {
- totalHits++;
- if (reusableSD == null) {
- reusableSD = new ScoreDoc(doc + docBase, score);
- } else if (score >= reusableSD.score) {
- // reusableSD holds the last "rejected" entry, so, if
- // this new score is not better than that, there's no
- // need to try inserting it
- reusableSD.doc = doc + docBase;
- reusableSD.score = score;
- } else {
- return;
- }
- reusableSD = (ScoreDoc) hq.insertWithOverflow(reusableSD);
+ protected TopDocs newTopDocs(ScoreDoc[] results, int start) {
+ if (results == null) {
+ return EMPTY_TOPDOCS;
}
- }
-
- /** The total number of documents that matched this query. */
- public int getTotalHits() {
- return totalHits;
- }
-
- /** The top-scoring hits. */
- public TopDocs topDocs() {
- ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
- for (int i = hq.size()-1; i >= 0; i--) { // put docs in array
- scoreDocs[i] = (ScoreDoc) hq.pop();
+
+ // We need to compute maxScore in order to set it in TopDocs. If start == 0,
+ // it means the largest element is already in results, use its score as
+ // maxScore. Otherwise pop everything else, until the largest element is
+ // extracted and use its score as maxScore.
+ float maxScore = Float.NaN;
+ if (start == 0) {
+ maxScore = results[0].score;
+ } else {
+ for (int i = pq.size(); i > 1; i--) { pq.pop(); }
+ maxScore = ((ScoreDoc) pq.pop()).score;
}
-
- float maxScore = (totalHits==0)
- ? Float.NEGATIVE_INFINITY
- : scoreDocs[0].score;
- return new TopDocs(totalHits, scoreDocs, maxScore);
+ return new TopDocs(totalHits, results, maxScore);
}
+ // javadoc inherited
+ public void collect(int doc) throws IOException {
+ float score = scorer.score();
+ totalHits++;
+ if (reusableSD == null) {
+ reusableSD = new ScoreDoc(doc + docBase, score);
+ } else if (score >= reusableSD.score) {
+ // reusableSD holds the last "rejected" entry, so, if
+ // this new score is not better than that, there's no
+ // need to try inserting it
+ reusableSD.doc = doc + docBase;
+ reusableSD.score = score;
+ } else {
+ return;
+ }
+ reusableSD = (ScoreDoc) pq.insertWithOverflow(reusableSD);
+ }
+
public void setNextReader(IndexReader reader, int base) {
docBase = base;
}
+
+ public void setScorer(Scorer scorer) throws IOException {
+ this.scorer = scorer;
+ }
}
Index: src/java/org/apache/lucene/search/Searcher.java
===================================================================
--- src/java/org/apache/lucene/search/Searcher.java (revision 762823)
+++ src/java/org/apache/lucene/search/Searcher.java (working copy)
@@ -76,9 +76,13 @@
* the top n hits for query, applying
* filter if non-null, and sorting the hits by the criteria in
* sort.
+ *
+ * NOTE: currently, this method tracks document scores and sets them in
+ * the returned {@link FieldDoc}, however in 3.0 it will move to not track
+ * document scores. If document scores tracking is still needed, you can use
+ * {@link #search(Weight, Filter, Collector)} and pass in a
+ * {@link TopFieldCollector} instance.
*
- *
Applications should usually call {@link - * Searcher#search(Query,Filter,Sort)} instead. * @throws BooleanQuery.TooManyClauses */ public TopFieldDocs search(Query query, Filter filter, int n, @@ -99,6 +103,7 @@ * In other words, the score will not necessarily be a float whose value is * between 0 and 1. * @throws BooleanQuery.TooManyClauses + * @deprecated use {@link #search(Query, Collector)} instead. */ public void search(Query query, HitCollector results) throws IOException { @@ -106,6 +111,24 @@ } /** Lower-level search API. + * + *
{@link Collector#collect(int)} is called for every matching document. + * + *
Applications should only use this if they need all of the + * matching documents. The high-level search API ({@link + * Searcher#search(Query)}) is usually more efficient, as it skips + * non-high-scoring hits. + *
Note: The score passed to this method is a raw score.
+ * In other words, the score will not necessarily be a float whose value is
+ * between 0 and 1.
+ * @throws BooleanQuery.TooManyClauses
+ */
+ public void search(Query query, Collector results)
+ throws IOException {
+ search(query, (Filter)null, results);
+ }
+
+ /** Lower-level search API.
*
*
{@link HitCollector#collect(int,float)} is called for every matching * document. @@ -120,11 +143,33 @@ * @param filter if non-null, used to permit documents to be collected. * @param results to receive hits * @throws BooleanQuery.TooManyClauses + * @deprecated use {@link #search(Query, Filter, Collector)} instead. */ public void search(Query query, Filter filter, HitCollector results) throws IOException { search(createWeight(query), filter, results); } + + /** Lower-level search API. + * + *
{@link Collector#collect(int)} is called for every matching
+ * document.
+ *
Collector-based access to remote indexes is discouraged.
+ *
+ *
Applications should only use this if they need all of the
+ * matching documents. The high-level search API ({@link
+ * Searcher#search(Query, Filter, int)}) is usually more efficient, as it skips
+ * non-high-scoring hits.
+ *
+ * @param query to match documents
+ * @param filter if non-null, used to permit documents to be collected.
+ * @param results to receive hits
+ * @throws BooleanQuery.TooManyClauses
+ */
+ public void search(Query query, Filter filter, Collector results)
+ throws IOException {
+ search(createWeight(query), filter, results);
+ }
/** Finds the top n
* hits for query, applying filter if non-null.
@@ -197,7 +242,11 @@
/* The following abstract methods were added as a workaround for GCJ bug #15411.
* http://gcc.gnu.org/bugzilla/show_bug.cgi?id=15411
*/
+ /**
+ * @deprecated use {@link #search(Weight, Filter, Collector)} instead.
+ */
abstract public void search(Weight weight, Filter filter, HitCollector results) throws IOException;
+ abstract public void search(Weight weight, Filter filter, Collector results) throws IOException;
abstract public void close() throws IOException;
abstract public int docFreq(Term term) throws IOException;
abstract public int maxDoc() throws IOException;
Index: src/java/org/apache/lucene/search/Collector.java
===================================================================
--- src/java/org/apache/lucene/search/Collector.java (revision 0)
+++ src/java/org/apache/lucene/search/Collector.java (revision 0)
@@ -0,0 +1,160 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.IndexReader;
+
+/**
+ *
Expert: Collectors are primarily meant to be used to + * gather raw results from a search, and implement sorting + * or custom result filtering, collation, etc.
+ * + *As of 2.9, this class replaces the deprecated + * HitCollector, and offers an API for efficient collection + * of hits across sequential {@link IndexReader}s. {@link + * IndexSearcher} advances the collector through each of the + * sub readers, in an arbitrary order. This results in a + * higher performance means of collection.
+ * + *Lucene's core collectors are derived from Collector. + * Likely your application can use one of these classes, or + * subclass {@link TopDocsCollector}, instead of + * implementing Collector directly: + * + *
Collector decouples the score from the collected doc: + * the score computation is skipped entirely if it's not + * needed. Collectors that do need the score should + * implement the {@link #setScorer} method, to hold onto the + * passed {@link Scorer} instance, and call {@link + * Scorer#score()} within the collect method to compute the + * current hit's score. If your collector may request the + * score for a single hit multiple times, you should use + * {@link ScoreCachingWrappingScorer}.
+ * + *NOTE: The doc that is passed to the collect + * method is relative to the current reader. If your + * collector needs to resolve this to the docID space of the + * Multi*Reader, you must re-base it by recording the + * docBase from the most recent setNextReader call. Here's + * a simple example showing how to collect docIDs into a + * BitSet:
+ * + *
+ * Searcher searcher = new IndexSearcher(indexReader);
+ * final BitSet bits = new BitSet(indexReader.maxDoc());
+ * searcher.search(query, new Collector() {
+ * private int docBase;
+ *
+ * // ignore scorer
+ * public void setScorer(Scorer scorer) {
+ * }
+ *
+ * public void collect(int doc) {
+ * bits.set(doc + docBase);
+ * }
+ *
+ * public void setNextReader(IndexReader reader, int docBase) {
+ * this.docBase = docBase;
+ * }
+ * });
+ *
+ *
+ * Not all collectors will need to rebase the docID. For + * example, a collector that simply counts the total number + * of hits would skip it.
+ * + *NOTE: Prior to 2.9, Lucene silently filtered + * out hits with score <= 0. As of 2.9, the core Collectors + * no longer do that. It's very unusual to have such hits + * (a negative query boost, or function query returning + * negative custom scores, could cause it to happen). If + * you need that behavior, use {@link + * PositiveScoresOnlyCollector}.
+ * + *NOTE: This API is experimental and might change + * in incompatible ways in the next release.
+ */ +public abstract class Collector { + + /** + * Called before successive calls to {@link #collect(int)}. Implementations + * that need the score of the current document (passed-in to + * {@link #collect(int)}), should save the passed-in Scorer and call + * scorer.score() when needed. + */ + public abstract void setScorer(Scorer scorer) throws IOException; + + /** + * Called once for every document matching a query, with the unbased document + * number. + * + *+ * Note: This is called in an inner search loop. For good search performance, + * implementations of this method should not call {@link Searcher#doc(int)} or + * {@link org.apache.lucene.index.IndexReader#document(int)} on every hit. + * Doing so can slow searches by an order of magnitude or more. + */ + public abstract void collect(int doc) throws IOException; + + /** + * Called before collecting from each IndexReader. All doc ids in + * {@link #collect(int)} will correspond to reader. + * + * Add docBase to the current IndexReaders internal document id to re-base ids + * in {@link #collect(int)}. + * + * @param reader + * next IndexReader + * @param docBase + */ + public abstract void setNextReader(IndexReader reader, int docBase) throws IOException; + +} Index: src/java/org/apache/lucene/search/Searchable.java =================================================================== --- src/java/org/apache/lucene/search/Searchable.java (revision 762823) +++ src/java/org/apache/lucene/search/Searchable.java (working copy) @@ -19,7 +19,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReader; // for javadoc import org.apache.lucene.index.Term; import org.apache.lucene.index.CorruptIndexException; @@ -51,10 +51,32 @@ * @param filter if non-null, used to permit documents to be collected. * @param results to receive hits * @throws BooleanQuery.TooManyClauses + * @deprecated use {@link #search(Weight, Filter, Collector)} instead. */ void search(Weight weight, Filter filter, HitCollector results) throws IOException; + /** + * Lower-level search API. + * + *
+ * {@link Collector#collect(int)} is called for every document.
+ * Collector-based access to remote indexes is discouraged.
+ *
+ *
+ * Applications should only use this if they need all of the matching
+ * documents. The high-level search API ({@link Searcher#search(Query)}) is
+ * usually more efficient, as it skips non-high-scoring hits.
+ *
+ * @param weight
+ * to match documents
+ * @param filter
+ * if non-null, used to permit documents to be collected.
+ * @param collector
+ * to receive hits
+ * @throws BooleanQuery.TooManyClauses
+ */
+ void search(Weight weight, Filter filter, Collector collector) throws IOException;
/** Frees resources associated with this Searcher.
* Be careful not to call this method while you are still using objects
@@ -140,6 +162,7 @@
*/
Explanation explain(Weight weight, int doc) throws IOException;
+ // TODO: change the javadoc in 3.0 to remove the last NOTE section.
/** Expert: Low-level search implementation with arbitrary sorting. Finds
* the top n hits for query, applying
* filter if non-null, and sorting the hits by the criteria in
@@ -147,6 +170,13 @@
*
*
Applications should usually call {@link
* Searcher#search(Query,Filter,Sort)} instead.
+ *
+ * NOTE: currently, this method tracks document scores and sets them in
+ * the returned {@link FieldDoc}, however in 3.0 it will move to not track
+ * document scores. If document scores tracking is still needed, you can use
+ * {@link #search(Weight, Filter, Collector)} and pass in a
+ * {@link TopFieldCollector} instance.
+ *
* @throws BooleanQuery.TooManyClauses
*/
TopFieldDocs search(Weight weight, Filter filter, int n, Sort sort)
Index: src/java/org/apache/lucene/search/MultiReaderHitCollector.java
===================================================================
--- src/java/org/apache/lucene/search/MultiReaderHitCollector.java (revision 762802)
+++ src/java/org/apache/lucene/search/MultiReaderHitCollector.java (working copy)
@@ -1,53 +0,0 @@
-package org.apache.lucene.search;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-import org.apache.lucene.index.IndexReader;
-
-/**
- * Expert: A HitCollector that can be used to collect hits
- * across sequential IndexReaders. For a Multi*Reader, this
- * collector advances through each of the sub readers, in an
- * arbitrary order. This results in a higher performance
- * means of collection.
- *
- * NOTE: The doc that is passed to the collect method
- * is relative to the current reader. You must re-base the
- * doc, by recording the docBase from the last setNextReader
- * call, to map it to the docID space of the
- * Multi*Reader.
- *
- * NOTE: This API is experimental and might change in
- * incompatible ways in the next release.
- */
-public abstract class MultiReaderHitCollector extends HitCollector {
- /**
- * Called before collecting from each IndexReader. All doc
- * ids in {@link #collect(int, float)} will correspond to reader.
- *
- * Add docBase to the current IndexReaders internal document id to
- * re-base ids in {@link #collect(int, float)}.
- *
- * @param reader next IndexReader
- * @param docBase
- * @throws IOException
- */
- public abstract void setNextReader(IndexReader reader, int docBase) throws IOException;
-}
Index: src/java/org/apache/lucene/search/PositiveScoresOnlyCollector.java
===================================================================
--- src/java/org/apache/lucene/search/PositiveScoresOnlyCollector.java (revision 0)
+++ src/java/org/apache/lucene/search/PositiveScoresOnlyCollector.java (revision 0)
@@ -0,0 +1,56 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.IndexReader;
+
+/**
+ * A {@link Collector} implementation which wraps another
+ * {@link Collector} and makes sure only documents with
+ * scores > 0 are collected.
+ */
+
+public class PositiveScoresOnlyCollector extends Collector {
+
+ final private Collector c;
+ private Scorer scorer;
+
+ public PositiveScoresOnlyCollector(Collector c) {
+ this.c = c;
+ }
+
+ public void collect(int doc) throws IOException {
+ if (scorer.score() > 0) {
+ c.collect(doc);
+ }
+ }
+
+ public void setNextReader(IndexReader reader, int docBase) throws IOException {
+ c.setNextReader(reader, docBase);
+ }
+
+ public void setScorer(Scorer scorer) throws IOException {
+ // Set a ScoreCachingWrappingScorer in case the wrapped Collector will call
+ // score() also.
+ this.scorer = new ScoreCachingWrappingScorer(scorer);
+ c.setScorer(this.scorer);
+ }
+
+}
Index: src/java/org/apache/lucene/search/HitCollector.java
===================================================================
--- src/java/org/apache/lucene/search/HitCollector.java (revision 762823)
+++ src/java/org/apache/lucene/search/HitCollector.java (working copy)
@@ -17,14 +17,7 @@
* limitations under the License.
*/
-/** Lower-level search API.
- *
HitCollectors are primarily meant to be used to implement queries,
- * sorting and filtering. See {@link
- * MultiReaderHitCollector} for a lower level and
- * higher performance (on a multi-segment index) API.
- * @see Searcher#search(Query,HitCollector)
- * @version $Id$
- */
+/** @deprecated Please use {@link Collector} instead. */
public abstract class HitCollector {
/** Called once for every document matching a query, with the document
* number and its raw score.
Index: src/java/org/apache/lucene/search/TimeLimitingCollector.java
===================================================================
--- src/java/org/apache/lucene/search/TimeLimitingCollector.java (revision 0)
+++ src/java/org/apache/lucene/search/TimeLimitingCollector.java (revision 0)
@@ -0,0 +1,219 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.IndexReader;
+
+/**
+ * The {@link TimeLimitingCollector} is used to timeout search requests that
+ * take longer than the maximum allowed search time limit. After this time is
+ * exceeded, the search thread is stopped by throwing a
+ * {@link TimeExceededException}.
+ */
+public class TimeLimitingCollector extends Collector {
+
+ /**
+ * Default timer resolution.
+ * @see #setResolution(long)
+ */
+ public static final int DEFAULT_RESOLUTION = 20;
+
+ /**
+ * Default for {@link #isGreedy()}.
+ * @see #isGreedy()
+ */
+ public boolean DEFAULT_GREEDY = false;
+
+ private static long resolution = DEFAULT_RESOLUTION;
+
+ private boolean greedy = DEFAULT_GREEDY ;
+
+ private static final class TimerThread extends Thread {
+
+ // NOTE: we can avoid explicit synchronization here for several reasons:
+ // * updates to volatile long variables are atomic
+ // * only single thread modifies this value
+ // * use of volatile keyword ensures that it does not reside in
+ // a register, but in main memory (so that changes are visible to
+ // other threads).
+ // * visibility of changes does not need to be instantanous, we can
+ // afford losing a tick or two.
+ //
+ // See section 17 of the Java Language Specification for details.
+ private volatile long time = 0;
+
+ /**
+ * TimerThread provides a pseudo-clock service to all searching
+ * threads, so that they can count elapsed time with less overhead
+ * than repeatedly calling System.currentTimeMillis. A single
+ * thread should be created to be used for all searches.
+ */
+ private TimerThread() {
+ super("TimeLimitedCollector timer thread");
+ this.setDaemon( true );
+ }
+
+ public void run() {
+ while (true) {
+ // TODO: Use System.nanoTime() when Lucene moves to Java SE 5.
+ time += resolution;
+ try {
+ Thread.sleep( resolution );
+ } catch (InterruptedException ie) {
+ Thread.currentThread().interrupt();
+ throw new RuntimeException(ie);
+ }
+ }
+ }
+
+ /**
+ * Get the timer value in milliseconds.
+ */
+ public long getMilliseconds() {
+ return time;
+ }
+ }
+
+ /** Thrown when elapsed search time exceeds allowed search time. */
+ public static class TimeExceededException extends RuntimeException {
+ private long timeAllowed;
+ private long timeElapsed;
+ private int lastDocCollected;
+ private TimeExceededException(long timeAllowed, long timeElapsed, int lastDocCollected) {
+ super("Elapsed time: " + timeElapsed + "Exceeded allowed search time: " + timeAllowed + " ms.");
+ this.timeAllowed = timeAllowed;
+ this.timeElapsed = timeElapsed;
+ this.lastDocCollected = lastDocCollected;
+ }
+ /** Returns allowed time (milliseconds). */
+ public long getTimeAllowed() {
+ return timeAllowed;
+ }
+ /** Returns elapsed time (milliseconds). */
+ public long getTimeElapsed() {
+ return timeElapsed;
+ }
+ /** Returns last doc that was collected when the search time exceeded. */
+ public int getLastDocCollected() {
+ return lastDocCollected;
+ }
+ }
+
+ // Declare and initialize a single static timer thread to be used by
+ // all TimeLimitedCollector instances. The JVM assures that
+ // this only happens once.
+ private final static TimerThread TIMER_THREAD = new TimerThread();
+
+ static {
+ TIMER_THREAD.start();
+ }
+
+ private final long t0;
+ private final long timeout;
+ private final Collector collector;
+
+ /**
+ * Create a TimeLimitedCollector wrapper over another {@link Collector} with a specified timeout.
+ * @param collector the wrapped {@link Collector}
+ * @param timeAllowed max time allowed for collecting hits after which {@link TimeExceededException} is thrown
+ */
+ public TimeLimitingCollector(final Collector collector, final long timeAllowed ) {
+ this.collector = collector;
+ t0 = TIMER_THREAD.getMilliseconds();
+ this.timeout = t0 + timeAllowed;
+ }
+
+ /**
+ * Return the timer resolution.
+ * @see #setResolution(long)
+ */
+ public static long getResolution() {
+ return resolution;
+ }
+
+ /**
+ * Set the timer resolution.
+ * The default timer resolution is 20 milliseconds.
+ * This means that a search required to take no longer than
+ * 800 milliseconds may be stopped after 780 to 820 milliseconds.
+ *
Note that:
+ *
The TimeLimitedCollector is used to timeout search requests that - * take longer than the maximum allowed search time limit. After this - * time is exceeded, the search thread is stopped by throwing a - * TimeExceeded Exception.
+ *+ * The TimeLimitedCollector is used to timeout search requests that take longer + * than the maximum allowed search time limit. After this time is exceeded, the + * search thread is stopped by throwing a TimeExceeded Exception. + *
* + * @deprecated this class will be removed in 3.0. Use + * {@link TimeLimitingCollector} instead, which extends the new + * {@link Collector}. */ -public class TimeLimitedCollector extends MultiReaderHitCollector { +public class TimeLimitedCollector extends HitCollector { /** * Default timer resolution. @@ -136,19 +136,15 @@ private final long t0; private final long timeout; - private final MultiReaderHitCollector hc; + private final HitCollector hc; /** * Create a TimeLimitedCollector wrapper over another HitCollector with a specified timeout. * @param hc the wrapped HitCollector * @param timeAllowed max time allowed for collecting hits after which {@link TimeExceededException} is thrown */ - public TimeLimitedCollector( final HitCollector hc, final long timeAllowed ) { - if (hc instanceof MultiReaderHitCollector) { - this.hc = (MultiReaderHitCollector) hc; - } else { - this.hc = new IndexSearcher.MultiReaderCollectorWrapper(hc); - } + public TimeLimitedCollector(final HitCollector hc, final long timeAllowed) { + this.hc = hc; t0 = TIMER_THREAD.getMilliseconds(); this.timeout = t0 + timeAllowed; } @@ -219,7 +215,4 @@ this.greedy = greedy; } - public void setNextReader(IndexReader reader, int base) throws IOException { - hc.setNextReader(reader, base); - } } Index: src/java/org/apache/lucene/search/TopFieldCollector.java =================================================================== --- src/java/org/apache/lucene/search/TopFieldCollector.java (revision 762823) +++ src/java/org/apache/lucene/search/TopFieldCollector.java (working copy) @@ -21,201 +21,320 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.FieldValueHitQueue.Entry; +import org.apache.lucene.util.PriorityQueue; /** - * A {@link HitCollector} that sorts by {@link SortField} using + * A {@link Collector} that sorts by {@link SortField} using * {@link FieldComparator}s. - * - * NOTE: This API is experimental and might change in - * incompatible ways in the next release. + * + *NOTE: This API is experimental and might change in + * incompatible ways in the next release.
*/ -public final class TopFieldCollector extends MultiReaderHitCollector { +public abstract class TopFieldCollector extends TopDocsCollector { + + // TODO: one optimization we could do is to pre-fill + // the queue with sentinel value that guaranteed to + // always compare lower than a real hit; this would + // save having to check queueFull on each insert - private final FieldValueHitQueue queue; + /* + * Implements a TopFieldCollector over one SortField criteria, without + * tracking document scores. + */ + private static class OneComparatorNonScoringCollector extends TopFieldCollector { - private final FieldComparator[] comparators; - private FieldComparator comparator1; - private final int numComparators; - private int[] reverseMul; - private int reverseMul1 = 0; - - private final int numHits; - private int totalHits; - private FieldValueHitQueue.Entry bottom = null; - - /** Stores the maximum score value encountered, needed for normalizing. */ - private float maxScore = Float.NEGATIVE_INFINITY; - - private boolean queueFull; - - private boolean fillFields; - - public TopFieldCollector(Sort sort, int numHits, IndexReader[] subReaders, boolean fillFields) - throws IOException { - - if (sort.fields.length == 0) { - throw new IllegalArgumentException("Sort must contain at least one field"); + final private FieldComparator comparator; + final private int reverseMul; + + public OneComparatorNonScoringCollector(FieldValueHitQueue queue, + int numHits, boolean fillFields) throws IOException { + super(queue, numHits, fillFields); + comparator = queue.getComparators()[0]; + reverseMul = queue.getReverseMul()[0]; } - - queue = new FieldValueHitQueue(sort.fields, numHits, subReaders); - comparators = queue.getComparators(); - reverseMul = queue.getReverseMul(); - numComparators = comparators.length; - - if (numComparators == 1) { - comparator1 = comparators[0]; - reverseMul1 = reverseMul[0]; - } else { - comparator1 = null; - reverseMul1 = 0; + + public void collect(int doc) throws IOException { + ++totalHits; + if (queueFull) { + // Fastmatch: return if this hit is not competitive + final int cmp = reverseMul * comparator.compareBottom(doc); + if (cmp < 0 || (cmp == 0 && doc + docBase > bottom.docID)) { + return; + } + + // This hit is competitive - replace bottom element in queue & adjustTop + comparator.copy(bottom.slot, doc); + updateBottom(doc, docScore); + comparator.setBottom(bottom.slot); + } else { + // Startup transient: queue hasn't gathered numHits yet + final int slot = totalHits - 1; + // Copy hit into queue + comparator.copy(slot, doc); + add(slot, doc, docScore); + if (queueFull) { + comparator.setBottom(bottom.slot); + } + } } - this.numHits = numHits; - this.fillFields = fillFields; + + public void setNextReader(IndexReader reader, int docBase) throws IOException { + final int numSlotsFull = queueFull ? numHits : totalHits; + this.docBase = docBase; + comparator.setNextReader(reader, docBase, numSlotsFull); + } + + public void setScorer(Scorer scorer) throws IOException { + comparator.setScorer(scorer); + } + } - int currentDocBase; + /* + * Implements a TopFieldCollector over one SortField criteria, with tracking + * document scores. + */ + private final static class OneComparatorScoringCollector extends OneComparatorNonScoringCollector { - // javadoc inherited - public void setNextReader(IndexReader reader, int docBase) throws IOException { - final int numSlotsFull; - if (queueFull) - numSlotsFull = numHits; - else - numSlotsFull = totalHits; - - currentDocBase = docBase; - - for (int i = 0; i < numComparators; i++) { - comparators[i].setNextReader(reader, docBase, numSlotsFull); + private Scorer scorer; + + public OneComparatorScoringCollector(FieldValueHitQueue queue, + int numHits, boolean fillFields) throws IOException { + super(queue, numHits, fillFields); } + + public void collect(int doc) throws IOException { + docScore = scorer.score(); + maxScore = Math.max(maxScore, docScore); + super.collect(doc); + } + + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + super.setScorer(scorer); + } } - private final void updateBottom(int doc, float score) { - bottom.docID = currentDocBase + doc; - bottom.score = score; - queue.adjustTop(); - bottom = (FieldValueHitQueue.Entry) queue.top(); - } - - private final void add(int slot, int doc, float score) { - queue.put(new FieldValueHitQueue.Entry(slot, currentDocBase+doc, score)); - bottom = (FieldValueHitQueue.Entry) queue.top(); - queueFull = totalHits == numHits; - } - - // javadoc inherited - public void collect(int doc, float score) { - if (score > 0.0f) { - - maxScore = Math.max(maxScore, score); - totalHits++; - - // TODO: one optimization we could do is to pre-fill - // the queue with sentinel value that guaranteed to - // always compare lower than a real hit; this would - // save having to check queueFull on each insert - + /* + * Implements a TopFieldCollector over multiple SortField criteria, without + * tracking document scores. + */ + private static class MultiComparatorNonScoringCollector extends TopFieldCollector { + + final private FieldComparator[] comparators; + final private int[] reverseMul; + + public MultiComparatorNonScoringCollector(FieldValueHitQueue queue, + int numHits, boolean fillFields) throws IOException { + super(queue, numHits, fillFields); + comparators = queue.getComparators(); + reverseMul = queue.getReverseMul(); + } + + public void collect(int doc) throws IOException { + ++totalHits; if (queueFull) { - - if (numComparators == 1) { - // Common case - - // Fastmatch: return if this hit is not competitive - final int cmp = reverseMul1 * comparator1.compareBottom(doc, score); - if (cmp < 0) { + // Fastmatch: return if this hit is not competitive + for (int i = 0;; i++) { + final int c = reverseMul[i] * comparators[i].compareBottom(doc); + if (c < 0) { // Definitely not competitive return; - } else if (cmp == 0 && doc + currentDocBase > bottom.docID) { - // Definitely not competitive - return; - } - - // This hit is competitive -- replace bottom - // element in queue & adjustTop - comparator1.copy(bottom.slot, doc, score); - - updateBottom(doc, score); - - comparator1.setBottom(bottom.slot); - - } else { - - // Fastmatch: return if this hit is not competitive - for(int i=0;;i++) { - final int c = reverseMul[i] * comparators[i].compareBottom(doc, score); - if (c < 0) { + } else if (c > 0) { + // Definitely competitive + break; + } else if (i == comparators.length - 1) { + // This is the equals case. + if (doc + docBase > bottom.docID) { // Definitely not competitive return; - } else if (c > 0) { - // Definitely competitive - break; - } else if (i == numComparators-1) { - // This is the equals case. - if (doc + currentDocBase > bottom.docID) { - // Definitely not competitive - return; - } else { - break; - } } + break; } + } - // This hit is competitive -- replace bottom - // element in queue & adjustTop - for (int i = 0; i < numComparators; i++) { - comparators[i].copy(bottom.slot, doc, score); - } + // This hit is competitive - replace bottom element in queue & adjustTop + for (int i = 0; i < comparators.length; i++) { + comparators[i].copy(bottom.slot, doc); + } - updateBottom(doc, score); + updateBottom(doc, docScore); - for(int i=0;iresults is null it means there are no results to return,
+ * either because there were 0 calls to collect() or because the arguments to
+ * topDocs were invalid.
+ */
+ protected TopDocs newTopDocs(ScoreDoc[] results, int start) {
+ return results == null ? EMPTY_TOPDOCS : new TopDocs(totalHits, results);
+ }
+
+ /** The total number of documents that matched this query. */
+ public int getTotalHits() {
+ return totalHits;
+ }
+
+ /** Returns the top docs that were collected by this collector. */
+ public final TopDocs topDocs() {
+ return topDocs(0, pq.size());
+ }
+
+ /**
+ * Returns the documents in the rage [start .. pq.size()) that were collected
+ * by this collector. Note that if start >= pq.size(), an empty TopDocs is
+ * returned.start, you should call {@link #topDocs()} and work
+ * with the returned {@link TopDocs} object, which will contain all the
+ * results this search execution collected.
+ */
+ public final TopDocs topDocs(int start) {
+ return topDocs(start, pq.size());
+ }
+
+ /**
+ * Returns the documents in the rage [start .. start+howMany) that were
+ * collected by this collector. Note that if start >= pq.size(), an empty
+ * TopDocs is returned, and if pq.size() - start < howMany, then only the
+ * available documents in [start .. pq.size()) are returned.{@link Collector#collect(int)} is called for every matching document. + * + *
Applications should only use this if they need all of the
+ * matching documents. The high-level search API ({@link
+ * Searcher#search(Query)}) is usually more efficient, as it skips
+ * non-high-scoring hits.
+ *
+ * @param weight to match documents
+ * @param filter if non-null, a bitset used to eliminate some documents
+ * @param collector to receive hits
+ *
+ * @todo parallelize this one too
+ */
+ public void search(Weight weight, Filter filter, final Collector collector)
+ throws IOException {
+ for (int i = 0; i < searchables.length; i++) {
- final MultiReaderHitCollector hc;
- if (results instanceof MultiReaderHitCollector) {
- // results can shift
- final MultiReaderHitCollector resultsMulti = (MultiReaderHitCollector) results;
- hc = new MultiReaderHitCollector() {
- public void collect(int doc, float score) {
- resultsMulti.collect(doc, score);
- }
+ final int start = starts[i];
- public void setNextReader(IndexReader reader, int docBase) throws IOException {
- resultsMulti.setNextReader(reader, start+docBase);
- }
- };
- } else {
- // We must shift the docIDs
- hc = new MultiReaderHitCollector() {
- private int docBase;
- public void collect(int doc, float score) {
- results.collect(doc + docBase + start, score);
- }
+ final Collector hc = new Collector() {
+ public void setScorer(Scorer scorer) throws IOException {
+ collector.setScorer(scorer);
+ }
+ public void collect(int doc) throws IOException {
+ collector.collect(doc);
+ }
+
+ public void setNextReader(IndexReader reader, int docBase) throws IOException {
+ collector.setNextReader(reader, start + docBase);
+ }
+ };
+
+ searchables[i].search(weight, filter, hc);
+ }
+ }
- public void setNextReader(IndexReader reader, int docBase) {
- this.docBase = docBase;
- }
- };
- }
-
- searchables[i].search(weight, filter, hc);
- }
- }
-
/*
* TODO: this one could be parallelized too
* @see org.apache.lucene.search.Searchable#rewrite(org.apache.lucene.search.Query)
Index: src/java/org/apache/lucene/search/IndexSearcher.java
===================================================================
--- src/java/org/apache/lucene/search/IndexSearcher.java (revision 762823)
+++ src/java/org/apache/lucene/search/IndexSearcher.java (working copy)
@@ -188,12 +188,16 @@
throws IOException {
return search(weight, filter, nDocs, sort, true);
}
-
- /**
- * Just like {@link #search(Weight, Filter, int, Sort)},
- * but you choose whether or not the fields in the
- * returned {@link FieldDoc} instances should be set by
- * specifying fillFields.
+
+ /**
+ * Just like {@link #search(Weight, Filter, int, Sort)}, but you choose
+ * whether or not the fields in the returned {@link FieldDoc} instances should
+ * be set by specifying fillFields.
+ * NOTE: currently, this method tracks document scores and sets them in
+ * the returned {@link FieldDoc}, however in 3.0 it will move to not track
+ * document scores. If document scores tracking is still needed, you can use
+ * {@link #search(Weight, Filter, Collector)} and pass in a
+ * {@link TopFieldCollector} instance.
*/
public TopFieldDocs search(Weight weight, Filter filter, final int nDocs,
Sort sort, boolean fillFields)
@@ -222,29 +226,32 @@
if (legacy) {
// Search the single top-level reader
- TopScoreDocCollector collector = new TopFieldDocCollector(reader, sort, nDocs);
- collector.setNextReader(reader, 0);
- doSearch(reader, weight, filter, collector);
+ TopDocCollector collector = new TopFieldDocCollector(reader, sort, nDocs);
+ HitCollectorWrapper hcw = new HitCollectorWrapper(collector);
+ hcw.setNextReader(reader, 0);
+ doSearch(reader, weight, filter, hcw);
return (TopFieldDocs) collector.topDocs();
- } else {
- // Search each sub-reader
- TopFieldCollector collector = new TopFieldCollector(sort, nDocs, sortedSubReaders, fillFields);
- search(weight, filter, collector);
- return (TopFieldDocs) collector.topDocs();
}
+ // Search each sub-reader
+ // TODO: by default we should create a TopFieldCollector which does not
+ // track document scores. Currently the default is set to true, however it
+ // will change in 3.0.
+ TopFieldCollector collector = TopFieldCollector.create(sort, nDocs, fillFields, true);
+ search(weight, filter, collector);
+ return (TopFieldDocs) collector.topDocs();
}
// inherit javadoc
+ /** @deprecated use {@link #search(Weight, Filter, Collector)} instead. */
public void search(Weight weight, Filter filter, HitCollector results)
throws IOException {
-
- final MultiReaderHitCollector collector;
- if (results instanceof MultiReaderHitCollector) {
- collector = (MultiReaderHitCollector) results;
- } else {
- collector = new MultiReaderCollectorWrapper(results);
- }
-
+ search(weight, filter, new HitCollectorWrapper(results));
+ }
+
+ // inherit javadoc
+ public void search(Weight weight, Filter filter, Collector collector)
+ throws IOException {
+
for (int i = 0; i < sortedSubReaders.length; i++) { // search each subreader
collector.setNextReader(sortedSubReaders[i], sortedStarts[i]);
doSearch(sortedSubReaders[i], weight, filter, collector);
@@ -252,14 +259,14 @@
}
private void doSearch(IndexReader reader, Weight weight, Filter filter,
- final HitCollector results) throws IOException {
+ final Collector collector) throws IOException {
Scorer scorer = weight.scorer(reader);
if (scorer == null)
return;
if (filter == null) {
- scorer.score(results);
+ scorer.score(collector);
return;
}
@@ -267,6 +274,7 @@
boolean more = filterDocIdIterator.next() && scorer.skipTo(filterDocIdIterator.doc());
+ collector.setScorer(scorer);
while (more) {
int filterDocId = filterDocIdIterator.doc();
if (filterDocId > scorer.doc() && !scorer.skipTo(filterDocId)) {
@@ -274,7 +282,7 @@
} else {
int scorerDocId = scorer.doc();
if (scorerDocId == filterDocId) { // permitted by filter
- results.collect(scorerDocId, scorer.score());
+ collector.collect(scorerDocId);
more = filterDocIdIterator.next();
} else {
more = filterDocIdIterator.skipTo(scorerDocId);
@@ -295,26 +303,4 @@
public Explanation explain(Weight weight, int doc) throws IOException {
return weight.explain(reader, doc);
}
-
- /**
- * Wrapper for non expert ({@link HitCollector})
- * implementations, which simply re-bases the incoming
- * docID before calling {@link HitCollector#collect}.
- */
- static class MultiReaderCollectorWrapper extends MultiReaderHitCollector {
- private HitCollector collector;
- private int base = -1;
-
- public MultiReaderCollectorWrapper(HitCollector collector) {
- this.collector = collector;
- }
-
- public void collect(int doc, float score) {
- collector.collect(doc + base, score);
- }
-
- public void setNextReader(IndexReader reader, int docBase) {
- base = docBase;
- }
- }
}
Index: src/java/org/apache/lucene/search/TermScorer.java
===================================================================
--- src/java/org/apache/lucene/search/TermScorer.java (revision 762823)
+++ src/java/org/apache/lucene/search/TermScorer.java (working copy)
@@ -24,6 +24,9 @@
/** Expert: A Scorer for documents matching a Term.
*/
final class TermScorer extends Scorer {
+
+ private static final float[] SIM_NORM_DECODER = Similarity.getNormDecoder();
+
private Weight weight;
private TermDocs termDocs;
private byte[] norms;
@@ -56,25 +59,26 @@
scoreCache[i] = getSimilarity().tf(i) * weightValue;
}
+ /** @deprecated use {@link #score(Collector)} instead. */
public void score(HitCollector hc) throws IOException {
+ score(new HitCollectorWrapper(hc));
+ }
+
+ public void score(Collector c) throws IOException {
next();
- score(hc, Integer.MAX_VALUE);
+ score(c, Integer.MAX_VALUE);
}
+ /** @deprecated use {@link #score(Collector, int)} instead. */
protected boolean score(HitCollector c, int end) throws IOException {
- Similarity similarity = getSimilarity(); // cache sim in local
- float[] normDecoder = Similarity.getNormDecoder();
+ return score(new HitCollectorWrapper(c), end);
+ }
+
+ protected boolean score(Collector c, int end) throws IOException {
+ c.setScorer(this);
while (doc < end) { // for docs in window
- int f = freqs[pointer];
- float score = // compute tf(f)*weight
- f < SCORE_CACHE_SIZE // check cache
- ? scoreCache[f] // cache hit
- : similarity.tf(f)*weightValue; // cache miss
-
- score *= normDecoder[norms[doc] & 0xFF]; // normalize for field
-
- c.collect(doc, score); // collect score
-
+ c.collect(doc); // collect score
+
if (++pointer >= pointerMax) {
pointerMax = termDocs.read(docs, freqs); // refill buffers
if (pointerMax != 0) {
@@ -123,7 +127,7 @@
? scoreCache[f] // cache hit
: getSimilarity().tf(f)*weightValue; // cache miss
- return raw * Similarity.decodeNorm(norms[doc]); // normalize for field
+ return raw * SIM_NORM_DECODER[norms[doc] & 0xFF]; // normalize for field
}
/** Skips to the first match beyond the current whose document number is
Index: src/java/org/apache/lucene/search/FieldComparatorSource.java
===================================================================
--- src/java/org/apache/lucene/search/FieldComparatorSource.java (revision 762823)
+++ src/java/org/apache/lucene/search/FieldComparatorSource.java (working copy)
@@ -18,7 +18,6 @@
*/
import java.io.IOException;
-import org.apache.lucene.index.IndexReader;
/**
* Provides a {@link FieldComparator} for custom field sorting.
@@ -38,6 +37,6 @@
* @throws IOException
* If an error occurs reading the index.
*/
- public abstract FieldComparator newComparator(String fieldname, IndexReader[] subReaders, int numHits, int sortPos, boolean reversed)
+ public abstract FieldComparator newComparator(String fieldname, int numHits, int sortPos, boolean reversed)
throws IOException;
}
Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java
===================================================================
--- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java (revision 762802)
+++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java (working copy)
@@ -27,6 +27,7 @@
*/
public class SearchWithSortTask extends ReadTask {
+ private boolean doScore = true;
private Sort sort;
public SearchWithSortTask(PerfRunData runData) {
@@ -34,7 +35,10 @@
}
/**
- * SortFields: field:type,field:type
+ * SortFields: field:type,field:type[,noscore]
+ *
+ * If noscore is present, then we turn off score tracking
+ * in TopFieldCollector
*
* name,byline:int,subject:auto
*
@@ -43,11 +47,15 @@
super.setParams(sortField);
String[] fields = sortField.split(",");
SortField[] sortFields = new SortField[fields.length];
+ int upto = 0;
for (int i = 0; i < fields.length; i++) {
String field = fields[i];
SortField sortField0;
if (field.equals("doc")) {
sortField0 = SortField.FIELD_DOC;
+ } else if (field.equals("noscore")) {
+ doScore = false;
+ continue;
} else {
int index = field.lastIndexOf(":");
String fieldName;
@@ -62,8 +70,14 @@
int type = getType(typeString);
sortField0 = new SortField(fieldName, type);
}
- sortFields[i] = sortField0;
+ sortFields[upto++] = sortField0;
}
+
+ if (upto < sortFields.length) {
+ SortField[] newSortFields = new SortField[upto];
+ System.arraycopy(sortFields, 0, newSortFields, 0, upto);
+ sortFields = newSortFields;
+ }
this.sort = new Sort(sortFields);
}
@@ -107,6 +121,10 @@
return false;
}
+ public boolean withScore() {
+ return doScore;
+ }
+
public Sort getSort() {
if (sort == null) {
throw new IllegalStateException("No sort field was set");
Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java
===================================================================
--- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java (revision 762802)
+++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java (working copy)
@@ -32,6 +32,7 @@
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
@@ -62,7 +63,6 @@
public ReadTask(PerfRunData runData) {
super(runData);
}
-
public int doLogic() throws Exception {
int res = 0;
boolean closeReader = false;
@@ -102,7 +102,13 @@
final int numHits = numHits();
if (numHits > 0) {
if (sort != null) {
- hits = searcher.search(q, null, numHits, sort);
+ if (withScore()) {
+ hits = searcher.search(q, null, numHits, sort);
+ } else {
+ TopFieldCollector collector = TopFieldCollector.create(sort, numHits, true, false);
+ searcher.search(q, collector);
+ hits = collector.topDocs();
+ }
} else {
hits = searcher.search(q, numHits);
}
@@ -180,6 +186,12 @@
*/
public abstract boolean withTraverse();
+ /** Shether scores should be computed (only useful with
+ * field sort */
+ public boolean withScore() {
+ return true;
+ }
+
/**
* Specify the number of hits to traverse. Tasks should override this if they want to restrict the number
* of hits that are traversed when {@link #withTraverse()} is true. Must be greater than 0.
Index: contrib/spatial/src/java/org/apache/lucene/spatial/tier/DistanceFieldComparatorSource.java
===================================================================
--- contrib/spatial/src/java/org/apache/lucene/spatial/tier/DistanceFieldComparatorSource.java (revision 762802)
+++ contrib/spatial/src/java/org/apache/lucene/spatial/tier/DistanceFieldComparatorSource.java (working copy)
@@ -21,7 +21,6 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Filter;
-import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.FieldComparatorSource;
import org.apache.lucene.search.SortField;
@@ -49,8 +48,7 @@
}
@Override
- public FieldComparator newComparator(String fieldname,
- IndexReader[] subReaders, int numHits, int sortPos, boolean reversed)
+ public FieldComparator newComparator(String fieldname, int numHits, int sortPos, boolean reversed)
throws IOException {
dsdlc = new DistanceScoreDocLookupComparator(distanceFilter, numHits);
return dsdlc;
@@ -87,7 +85,7 @@
}
@Override
- public int compareBottom(int doc, float score) {
+ public int compareBottom(int doc) {
final double v2 = distanceFilter.getDistance(doc);
if (bottom > v2) {
return 1;
@@ -98,7 +96,7 @@
}
@Override
- public void copy(int slot, int doc, float score) {
+ public void copy(int slot, int doc) {
values[slot] = distanceFilter.getDistance(doc);
}
Index: contrib/miscellaneous/src/test/org/apache/lucene/index/TestFieldNormModifier.java
===================================================================
--- contrib/miscellaneous/src/test/org/apache/lucene/index/TestFieldNormModifier.java (revision 762802)
+++ contrib/miscellaneous/src/test/org/apache/lucene/index/TestFieldNormModifier.java (working copy)
@@ -22,16 +22,18 @@
import junit.framework.TestCase;
+import org.apache.lucene.analysis.SimpleAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexWriter.MaxFieldLength;
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.MultiReaderHitCollector;
+import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Similarity;
-import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.analysis.SimpleAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
/**
* Tests changing of field norms with a custom similarity and with fake norms.
@@ -52,12 +54,12 @@
/** inverts the normal notion of lengthNorm */
public static Similarity s = new DefaultSimilarity() {
public float lengthNorm(String fieldName, int numTokens) {
- return (float)numTokens;
+ return numTokens;
}
};
public void setUp() throws Exception {
- IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true);
+ IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true, MaxFieldLength.UNLIMITED);
for (int i = 0; i < NUM_DOCS; i++) {
Document d = new Document();
@@ -123,14 +125,19 @@
float lastScore = 0.0f;
// default similarity should put docs with shorter length first
- searcher.search(new TermQuery(new Term("field", "word")), new MultiReaderHitCollector() {
- private int docBase = -1;
- public final void collect(int doc, float score) {
- scores[doc + docBase] = score;
+ searcher.search(new TermQuery(new Term("field", "word")), new Collector() {
+ private int docBase = 0;
+ private Scorer scorer;
+
+ public final void collect(int doc) throws IOException {
+ scores[doc + docBase] = scorer.score();
}
public void setNextReader(IndexReader reader, int docBase) {
this.docBase = docBase;
}
+ public void setScorer(Scorer scorer) throws IOException {
+ this.scorer = scorer;
+ }
});
searcher.close();
@@ -147,14 +154,18 @@
// new norm (with default similarity) should put longer docs first
searcher = new IndexSearcher(store);
- searcher.search(new TermQuery(new Term("field", "word")), new MultiReaderHitCollector() {
- private int docBase = -1;
- public final void collect(int doc, float score) {
- scores[doc + docBase] = score;
+ searcher.search(new TermQuery(new Term("field", "word")), new Collector() {
+ private int docBase = 0;
+ private Scorer scorer;
+ public final void collect(int doc) throws IOException {
+ scores[doc + docBase] = scorer.score();
}
public void setNextReader(IndexReader reader, int docBase) {
this.docBase = docBase;
}
+ public void setScorer(Scorer scorer) throws IOException {
+ this.scorer = scorer;
+ }
});
searcher.close();
@@ -188,15 +199,18 @@
float lastScore = 0.0f;
// default similarity should return the same score for all documents for this query
- searcher.search(new TermQuery(new Term("untokfield", "20061212")), new MultiReaderHitCollector() {
- private int docBase = -1;
- private int lastMax;
- public final void collect(int doc, float score) {
- scores[doc + docBase] = score;
+ searcher.search(new TermQuery(new Term("untokfield", "20061212")), new Collector() {
+ private int docBase = 0;
+ private Scorer scorer;
+ public final void collect(int doc) throws IOException {
+ scores[doc + docBase] = scorer.score();
}
public void setNextReader(IndexReader reader, int docBase) {
this.docBase = docBase;
}
+ public void setScorer(Scorer scorer) throws IOException {
+ this.scorer = scorer;
+ }
});
searcher.close();
Index: contrib/miscellaneous/src/test/org/apache/lucene/misc/TestLengthNormModifier.java
===================================================================
--- contrib/miscellaneous/src/test/org/apache/lucene/misc/TestLengthNormModifier.java (revision 762802)
+++ contrib/miscellaneous/src/test/org/apache/lucene/misc/TestLengthNormModifier.java (working copy)
@@ -17,21 +17,26 @@
* limitations under the License.
*/
+import java.io.IOException;
+
import junit.framework.TestCase;
+import org.apache.lucene.analysis.SimpleAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.FieldNormModifier;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter.MaxFieldLength;
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.MultiReaderHitCollector;
+import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Similarity;
-import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.analysis.SimpleAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
/**
* Tests changing the norms after changing the simularity
@@ -52,13 +57,12 @@
/** inverts the normal notion of lengthNorm */
public static Similarity s = new DefaultSimilarity() {
public float lengthNorm(String fieldName, int numTokens) {
- return (float)numTokens;
+ return numTokens;
}
};
public void setUp() throws Exception {
- IndexWriter writer = new
- IndexWriter(store, new SimpleAnalyzer(), true);
+ IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true, MaxFieldLength.UNLIMITED);
for (int i = 0; i < NUM_DOCS; i++) {
Document d = new Document();
@@ -79,9 +83,9 @@
}
public void testMissingField() {
- LengthNormModifier lnm = new LengthNormModifier(store, s);
+ FieldNormModifier fnm = new FieldNormModifier(store, s);
try {
- lnm.reSetNorms("nobodyherebutuschickens");
+ fnm.reSetNorms("nobodyherebutuschickens");
} catch (Exception e) {
assertNull("caught something", e);
}
@@ -100,9 +104,9 @@
r.close();
- LengthNormModifier lnm = new LengthNormModifier(store, s);
+ FieldNormModifier fnm = new FieldNormModifier(store, s);
try {
- lnm.reSetNorms("nonorm");
+ fnm.reSetNorms("nonorm");
} catch (Exception e) {
assertNull("caught something", e);
}
@@ -129,14 +133,18 @@
// default similarity should put docs with shorter length first
searcher = new IndexSearcher(store);
- searcher.search(new TermQuery(new Term("field", "word")), new MultiReaderHitCollector() {
- private int docBase = -1;
- public final void collect(int doc, float score) {
- scores[doc + docBase] = score;
+ searcher.search(new TermQuery(new Term("field", "word")), new Collector() {
+ private int docBase = 0;
+ private Scorer scorer;
+ public final void collect(int doc) throws IOException {
+ scores[doc + docBase] = scorer.score();
}
public void setNextReader(IndexReader reader, int docBase) {
this.docBase = docBase;
}
+ public void setScorer(Scorer scorer) throws IOException {
+ this.scorer = scorer;
+ }
});
searcher.close();
@@ -151,22 +159,26 @@
// override the norms to be inverted
Similarity s = new DefaultSimilarity() {
public float lengthNorm(String fieldName, int numTokens) {
- return (float)numTokens;
+ return numTokens;
}
};
- LengthNormModifier lnm = new LengthNormModifier(store, s);
- lnm.reSetNorms("field");
+ FieldNormModifier fnm = new FieldNormModifier(store, s);
+ fnm.reSetNorms("field");
// new norm (with default similarity) should put longer docs first
searcher = new IndexSearcher(store);
- searcher.search(new TermQuery(new Term("field", "word")), new MultiReaderHitCollector() {
- private int docBase = -1;
- public final void collect(int doc, float score) {
- scores[doc + docBase] = score;
+ searcher.search(new TermQuery(new Term("field", "word")), new Collector() {
+ private int docBase = 0;
+ private Scorer scorer;
+ public final void collect(int doc) throws IOException {
+ scores[doc + docBase] = scorer.score();
}
public void setNextReader(IndexReader reader, int docBase) {
this.docBase = docBase;
}
+ public void setScorer(Scorer scorer) throws IOException {
+ this.scorer = scorer;
+ }
});
searcher.close();