Index: src/java/org/apache/lucene/search/TopScoreDocCollector.java
===================================================================
--- src/java/org/apache/lucene/search/TopScoreDocCollector.java (revision 912409)
+++ src/java/org/apache/lucene/search/TopScoreDocCollector.java (working copy)
@@ -28,71 +28,119 @@
* and then (when the scores are tied) docID ascending. When you create an
* instance of this collector you should know in advance whether documents are
* going to be collected in doc Id order or not.
- *
- *
NOTE: The values {@link Float#NaN} and
- * {Float#NEGATIVE_INFINITY} are not valid scores. This
- * collector will not properly collect hits with such
- * scores.
*/
public abstract class TopScoreDocCollector extends TopDocsCollector {
+ private static abstract class Collector {
+ protected ScoreDoc pqTop, pqReuse = new ScoreDoc(0, 0.0f);
+
+ public Collector(ScoreDoc pqTop) {
+ this.pqTop = pqTop;
+ }
+
+ public abstract void collect(int doc) throws IOException;
+ }
+
+ // this impl of the above class is used when numHits==0
+ private final class NullCollector extends Collector {
+ public NullCollector() {
+ super(null);
+ }
+
+ @Override
+ public void collect(final int doc) {
+ totalHits++;
+ }
+ }
+
// Assumes docs are scored in order.
- private static class InOrderTopScoreDocCollector extends TopScoreDocCollector {
- private InOrderTopScoreDocCollector(int numHits) {
+ private static final class InOrderTopScoreDocCollector extends TopScoreDocCollector {
+ private InOrderTopScoreDocCollector(final int numHits) {
super(numHits);
+ collector = (numHits == 0) ? new NullCollector() : new Collector(null) {
+ @Override
+ public void collect(final int doc) throws IOException {
+ totalHits++;
+ pq.add(new ScoreDoc(doc + docBase, scorer.score()));
+
+ if (totalHits == numHits) {
+ // exchange the collector impl
+ collector = new Collector(pq.top()) {
+
+ @Override
+ public void collect(final int doc) throws IOException {
+ final float score = scorer.score();
+ totalHits++;
+ if (score <= pqTop.score) {
+ // Since docs are returned in-order (i.e., increasing doc Id), a document
+ // with equal score to pqTop.score cannot compete since HitQueue favors
+ // documents with lower doc Ids. Therefore reject those docs too.
+ return;
+ }
+ pqReuse.doc = doc + docBase;
+ pqReuse.score = score;
+ pqReuse = pq.insertWithOverflow(pqReuse);
+ pqTop = pq.top();
+ }
+
+ };
+ }
+ }
+ };
}
@Override
- public void collect(int doc) throws IOException {
- float score = scorer.score();
-
- // This collector cannot handle these scores:
- assert score != Float.NEGATIVE_INFINITY;
- assert !Float.isNaN(score);
-
- totalHits++;
- if (score <= pqTop.score) {
- // Since docs are returned in-order (i.e., increasing doc Id), a document
- // with equal score to pqTop.score cannot compete since HitQueue favors
- // documents with lower doc Ids. Therefore reject those docs too.
- return;
- }
- pqTop.doc = doc + docBase;
- pqTop.score = score;
- pqTop = pq.updateTop();
+ public final void collect(int doc) throws IOException {
+ collector.collect(doc);
}
@Override
- public boolean acceptsDocsOutOfOrder() {
+ public final boolean acceptsDocsOutOfOrder() {
return false;
}
}
// Assumes docs are scored out of order.
- private static class OutOfOrderTopScoreDocCollector extends TopScoreDocCollector {
- private OutOfOrderTopScoreDocCollector(int numHits) {
+ private static final class OutOfOrderTopScoreDocCollector extends TopScoreDocCollector {
+ private OutOfOrderTopScoreDocCollector(final int numHits) {
super(numHits);
+ collector = (numHits == 0) ? new NullCollector() : new Collector(null) {
+ @Override
+ public void collect(final int doc) throws IOException {
+ totalHits++;
+ pq.add(new ScoreDoc(doc + docBase, scorer.score()));
+
+ if (totalHits == numHits) {
+ // exchange the collector impl
+ collector = new Collector(pq.top()) {
+
+ @Override
+ public void collect(int doc) throws IOException {
+ final float score = scorer.score();
+ totalHits++;
+ doc += docBase;
+ if (score < pqTop.score || (score == pqTop.score && doc > pqTop.doc)) {
+ return;
+ }
+ pqReuse.doc = doc;
+ pqReuse.score = score;
+ pqReuse = pq.insertWithOverflow(pqReuse);
+ pqTop = pq.top();
+ }
+
+ };
+ }
+ }
+ };
}
@Override
- public void collect(int doc) throws IOException {
- float score = scorer.score();
-
- // This collector cannot handle NaN
- assert !Float.isNaN(score);
-
- totalHits++;
- doc += docBase;
- if (score < pqTop.score || (score == pqTop.score && doc > pqTop.doc)) {
- return;
- }
- pqTop.doc = doc;
- pqTop.score = score;
- pqTop = pq.updateTop();
+ public final void collect(int doc) throws IOException {
+ collector.collect(doc);
}
@Override
- public boolean acceptsDocsOutOfOrder() {
+ public final boolean acceptsDocsOutOfOrder() {
return true;
}
}
@@ -108,25 +156,20 @@
* objects.
*/
public static TopScoreDocCollector create(int numHits, boolean docsScoredInOrder) {
-
if (docsScoredInOrder) {
return new InOrderTopScoreDocCollector(numHits);
} else {
return new OutOfOrderTopScoreDocCollector(numHits);
}
-
}
- ScoreDoc pqTop;
+ Collector collector;
int docBase = 0;
Scorer scorer;
// prevents instantiation
private TopScoreDocCollector(int numHits) {
- super(new HitQueue(numHits, true));
- // HitQueue implements getSentinelObject to return a ScoreDoc, so we know
- // that at this point top() is already initialized.
- pqTop = pq.top();
+ super(new HitQueue(numHits, false));
}
@Override
Index: src/test/org/apache/lucene/search/TestTopScoreDocCollector.java
===================================================================
--- src/test/org/apache/lucene/search/TestTopScoreDocCollector.java (revision 912409)
+++ src/test/org/apache/lucene/search/TestTopScoreDocCollector.java (working copy)
@@ -18,12 +18,15 @@
*/
import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.search.function.CustomScoreQuery;
+import org.apache.lucene.search.function.CustomScoreProvider;
public class TestTopScoreDocCollector extends LuceneTestCase {
@@ -35,7 +38,6 @@
}
public void testOutOfOrderCollection() throws Exception {
-
Directory dir = new RAMDirectory();
IndexWriter writer = new IndexWriter(dir, null, MaxFieldLength.UNLIMITED);
for (int i = 0; i < 10; i++) {
@@ -50,13 +52,6 @@
"InOrderTopScoreDocCollector"
};
- BooleanQuery bq = new BooleanQuery();
- // Add a Query with SHOULD, since bw.scorer() returns BooleanScorer2
- // which delegates to BS if there are no mandatory clauses.
- bq.add(new MatchAllDocsQuery(), Occur.SHOULD);
- // Set minNrShouldMatch to 1 so that BQ will not optimize rewrite to return
- // the clause instead of BQ.
- bq.setMinimumNumberShouldMatch(1);
IndexSearcher searcher = new IndexSearcher(dir, true);
for (int i = 0; i < inOrder.length; i++) {
TopDocsCollector tdc = TopScoreDocCollector.create(3, inOrder[i]);
@@ -66,10 +61,96 @@
ScoreDoc[] sd = tdc.topDocs().scoreDocs;
assertEquals(3, sd.length);
+ assertEquals(10, tdc.getTotalHits());
for (int j = 0; j < sd.length; j++) {
assertEquals("expected doc Id " + j + " found " + sd[j].doc, j, sd[j].doc);
}
}
+
+ for (int i = 0; i < inOrder.length; i++) {
+ TopDocsCollector tdc = TopScoreDocCollector.create(0, inOrder[i]);
+ assertEquals("org.apache.lucene.search.TopScoreDocCollector$" + actualTSDCClass[i], tdc.getClass().getName());
+
+ searcher.search(new MatchAllDocsQuery(), tdc);
+
+ ScoreDoc[] sd = tdc.topDocs().scoreDocs;
+ assertEquals(0, sd.length);
+ assertEquals(10, tdc.getTotalHits());
+ for (int j = 0; j < sd.length; j++) {
+ assertEquals("expected doc Id " + j + " found " + sd[j].doc, j, sd[j].doc);
+ }
+ }
}
+ public void testSpecialValues() throws Exception {
+ Directory dir = new RAMDirectory();
+ IndexWriter writer = new IndexWriter(dir, null, MaxFieldLength.UNLIMITED);
+ for (int i = 0; i < 50; i++) {
+ writer.addDocument(new Document());
+ }
+ writer.commit();
+ writer.close();
+
+ boolean[] inOrder = new boolean[] { false, true };
+
+ IndexSearcher searcher = new IndexSearcher(dir, true);
+ for (int i = 0; i < inOrder.length; i++) {
+ // use a larger numDocs value to still have sentinels after query in queue!!!
+
+ // fist check all values for validity, incl NaN
+ TopDocsCollector tdc = TopScoreDocCollector.create(100, inOrder[i]);
+ searcher.search(new CustomScoreQuery(new MatchAllDocsQuery()) {
+ public CustomScoreProvider getCustomScoreProvider(IndexReader reader) {
+ return new CustomScoreProvider(reader) {
+ @Override
+ public float customScore(int doc, float subQueryScore, float valSrcScore) {
+ switch (doc % 4) {
+ case 1: return Float.NEGATIVE_INFINITY;
+ case 2: return Float.POSITIVE_INFINITY;
+ case 3: return Float.NaN;
+ default: return 1.0f;
+ }
+ }
+ };
+ }
+ }, tdc);
+
+ ScoreDoc[] sd = tdc.topDocs().scoreDocs;
+ assertEquals(50, sd.length);
+ assertEquals(50, tdc.getTotalHits());
+ for (int j = 0; j < sd.length; j++) {
+ assertTrue("Invalid docid="+sd[j].doc+" for score="+sd[j].score, sd[j].doc >=0 && sd[j].doc != Integer.MAX_VALUE);
+ }
+
+ // then only test infinity values and compare the scored docs (if decreasing)
+ tdc = TopScoreDocCollector.create(100, inOrder[i]);
+ searcher.search(new CustomScoreQuery(new MatchAllDocsQuery()) {
+ @Override
+ public CustomScoreProvider getCustomScoreProvider(IndexReader reader) {
+ return new CustomScoreProvider(reader) {
+ @Override
+ public float customScore(int doc, float subQueryScore, float valSrcScore) {
+ switch (doc % 3) {
+ case 1: return Float.NEGATIVE_INFINITY;
+ case 2: return Float.POSITIVE_INFINITY;
+ default: return (float) (doc - 25);
+ }
+ }
+ };
+ }
+ }, tdc);
+
+ sd = tdc.topDocs().scoreDocs;
+ assertEquals(50, sd.length);
+ assertEquals(50, tdc.getTotalHits());
+ for (int j = 1; j < sd.length; j++) {
+ //System.out.println("inOrder="+inOrder[i]+" doc="+sd[j].doc+" score="+sd[j].score);
+ assertTrue("Score must be decreasing", sd[j-1].score >= sd[j].score);
+ if (sd[j-1].score == sd[j].score) {
+ assertTrue("docid must be increasing when score unchanged", sd[j-1].doc < sd[j].doc);
+ }
+ }
+ }
+ }
+
}