Index: src/java/org/apache/lucene/search/TopScoreDocCollector.java =================================================================== --- src/java/org/apache/lucene/search/TopScoreDocCollector.java (revision 912409) +++ src/java/org/apache/lucene/search/TopScoreDocCollector.java (working copy) @@ -28,71 +28,119 @@ * and then (when the scores are tied) docID ascending. When you create an * instance of this collector you should know in advance whether documents are * going to be collected in doc Id order or not. - * - *

NOTE: The values {@link Float#NaN} and - * {Float#NEGATIVE_INFINITY} are not valid scores. This - * collector will not properly collect hits with such - * scores. */ public abstract class TopScoreDocCollector extends TopDocsCollector { + private static abstract class Collector { + protected ScoreDoc pqTop, pqReuse = new ScoreDoc(0, 0.0f); + + public Collector(ScoreDoc pqTop) { + this.pqTop = pqTop; + } + + public abstract void collect(int doc) throws IOException; + } + + // this impl of the above class is used when numHits==0 + private final class NullCollector extends Collector { + public NullCollector() { + super(null); + } + + @Override + public void collect(final int doc) { + totalHits++; + } + } + // Assumes docs are scored in order. - private static class InOrderTopScoreDocCollector extends TopScoreDocCollector { - private InOrderTopScoreDocCollector(int numHits) { + private static final class InOrderTopScoreDocCollector extends TopScoreDocCollector { + private InOrderTopScoreDocCollector(final int numHits) { super(numHits); + collector = (numHits == 0) ? new NullCollector() : new Collector(null) { + @Override + public void collect(final int doc) throws IOException { + totalHits++; + pq.add(new ScoreDoc(doc + docBase, scorer.score())); + + if (totalHits == numHits) { + // exchange the collector impl + collector = new Collector(pq.top()) { + + @Override + public void collect(final int doc) throws IOException { + final float score = scorer.score(); + totalHits++; + if (score <= pqTop.score) { + // Since docs are returned in-order (i.e., increasing doc Id), a document + // with equal score to pqTop.score cannot compete since HitQueue favors + // documents with lower doc Ids. Therefore reject those docs too. + return; + } + pqReuse.doc = doc + docBase; + pqReuse.score = score; + pqReuse = pq.insertWithOverflow(pqReuse); + pqTop = pq.top(); + } + + }; + } + } + }; } @Override - public void collect(int doc) throws IOException { - float score = scorer.score(); - - // This collector cannot handle these scores: - assert score != Float.NEGATIVE_INFINITY; - assert !Float.isNaN(score); - - totalHits++; - if (score <= pqTop.score) { - // Since docs are returned in-order (i.e., increasing doc Id), a document - // with equal score to pqTop.score cannot compete since HitQueue favors - // documents with lower doc Ids. Therefore reject those docs too. - return; - } - pqTop.doc = doc + docBase; - pqTop.score = score; - pqTop = pq.updateTop(); + public final void collect(int doc) throws IOException { + collector.collect(doc); } @Override - public boolean acceptsDocsOutOfOrder() { + public final boolean acceptsDocsOutOfOrder() { return false; } } // Assumes docs are scored out of order. - private static class OutOfOrderTopScoreDocCollector extends TopScoreDocCollector { - private OutOfOrderTopScoreDocCollector(int numHits) { + private static final class OutOfOrderTopScoreDocCollector extends TopScoreDocCollector { + private OutOfOrderTopScoreDocCollector(final int numHits) { super(numHits); + collector = (numHits == 0) ? new NullCollector() : new Collector(null) { + @Override + public void collect(final int doc) throws IOException { + totalHits++; + pq.add(new ScoreDoc(doc + docBase, scorer.score())); + + if (totalHits == numHits) { + // exchange the collector impl + collector = new Collector(pq.top()) { + + @Override + public void collect(int doc) throws IOException { + final float score = scorer.score(); + totalHits++; + doc += docBase; + if (score < pqTop.score || (score == pqTop.score && doc > pqTop.doc)) { + return; + } + pqReuse.doc = doc; + pqReuse.score = score; + pqReuse = pq.insertWithOverflow(pqReuse); + pqTop = pq.top(); + } + + }; + } + } + }; } @Override - public void collect(int doc) throws IOException { - float score = scorer.score(); - - // This collector cannot handle NaN - assert !Float.isNaN(score); - - totalHits++; - doc += docBase; - if (score < pqTop.score || (score == pqTop.score && doc > pqTop.doc)) { - return; - } - pqTop.doc = doc; - pqTop.score = score; - pqTop = pq.updateTop(); + public final void collect(int doc) throws IOException { + collector.collect(doc); } @Override - public boolean acceptsDocsOutOfOrder() { + public final boolean acceptsDocsOutOfOrder() { return true; } } @@ -108,25 +156,20 @@ * objects. */ public static TopScoreDocCollector create(int numHits, boolean docsScoredInOrder) { - if (docsScoredInOrder) { return new InOrderTopScoreDocCollector(numHits); } else { return new OutOfOrderTopScoreDocCollector(numHits); } - } - ScoreDoc pqTop; + Collector collector; int docBase = 0; Scorer scorer; // prevents instantiation private TopScoreDocCollector(int numHits) { - super(new HitQueue(numHits, true)); - // HitQueue implements getSentinelObject to return a ScoreDoc, so we know - // that at this point top() is already initialized. - pqTop = pq.top(); + super(new HitQueue(numHits, false)); } @Override Index: src/test/org/apache/lucene/search/TestTopScoreDocCollector.java =================================================================== --- src/test/org/apache/lucene/search/TestTopScoreDocCollector.java (revision 912409) +++ src/test/org/apache/lucene/search/TestTopScoreDocCollector.java (working copy) @@ -18,12 +18,15 @@ */ import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter.MaxFieldLength; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.search.function.CustomScoreQuery; +import org.apache.lucene.search.function.CustomScoreProvider; public class TestTopScoreDocCollector extends LuceneTestCase { @@ -35,7 +38,6 @@ } public void testOutOfOrderCollection() throws Exception { - Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, null, MaxFieldLength.UNLIMITED); for (int i = 0; i < 10; i++) { @@ -50,13 +52,6 @@ "InOrderTopScoreDocCollector" }; - BooleanQuery bq = new BooleanQuery(); - // Add a Query with SHOULD, since bw.scorer() returns BooleanScorer2 - // which delegates to BS if there are no mandatory clauses. - bq.add(new MatchAllDocsQuery(), Occur.SHOULD); - // Set minNrShouldMatch to 1 so that BQ will not optimize rewrite to return - // the clause instead of BQ. - bq.setMinimumNumberShouldMatch(1); IndexSearcher searcher = new IndexSearcher(dir, true); for (int i = 0; i < inOrder.length; i++) { TopDocsCollector tdc = TopScoreDocCollector.create(3, inOrder[i]); @@ -66,10 +61,96 @@ ScoreDoc[] sd = tdc.topDocs().scoreDocs; assertEquals(3, sd.length); + assertEquals(10, tdc.getTotalHits()); for (int j = 0; j < sd.length; j++) { assertEquals("expected doc Id " + j + " found " + sd[j].doc, j, sd[j].doc); } } + + for (int i = 0; i < inOrder.length; i++) { + TopDocsCollector tdc = TopScoreDocCollector.create(0, inOrder[i]); + assertEquals("org.apache.lucene.search.TopScoreDocCollector$" + actualTSDCClass[i], tdc.getClass().getName()); + + searcher.search(new MatchAllDocsQuery(), tdc); + + ScoreDoc[] sd = tdc.topDocs().scoreDocs; + assertEquals(0, sd.length); + assertEquals(10, tdc.getTotalHits()); + for (int j = 0; j < sd.length; j++) { + assertEquals("expected doc Id " + j + " found " + sd[j].doc, j, sd[j].doc); + } + } } + public void testSpecialValues() throws Exception { + Directory dir = new RAMDirectory(); + IndexWriter writer = new IndexWriter(dir, null, MaxFieldLength.UNLIMITED); + for (int i = 0; i < 50; i++) { + writer.addDocument(new Document()); + } + writer.commit(); + writer.close(); + + boolean[] inOrder = new boolean[] { false, true }; + + IndexSearcher searcher = new IndexSearcher(dir, true); + for (int i = 0; i < inOrder.length; i++) { + // use a larger numDocs value to still have sentinels after query in queue!!! + + // fist check all values for validity, incl NaN + TopDocsCollector tdc = TopScoreDocCollector.create(100, inOrder[i]); + searcher.search(new CustomScoreQuery(new MatchAllDocsQuery()) { + public CustomScoreProvider getCustomScoreProvider(IndexReader reader) { + return new CustomScoreProvider(reader) { + @Override + public float customScore(int doc, float subQueryScore, float valSrcScore) { + switch (doc % 4) { + case 1: return Float.NEGATIVE_INFINITY; + case 2: return Float.POSITIVE_INFINITY; + case 3: return Float.NaN; + default: return 1.0f; + } + } + }; + } + }, tdc); + + ScoreDoc[] sd = tdc.topDocs().scoreDocs; + assertEquals(50, sd.length); + assertEquals(50, tdc.getTotalHits()); + for (int j = 0; j < sd.length; j++) { + assertTrue("Invalid docid="+sd[j].doc+" for score="+sd[j].score, sd[j].doc >=0 && sd[j].doc != Integer.MAX_VALUE); + } + + // then only test infinity values and compare the scored docs (if decreasing) + tdc = TopScoreDocCollector.create(100, inOrder[i]); + searcher.search(new CustomScoreQuery(new MatchAllDocsQuery()) { + @Override + public CustomScoreProvider getCustomScoreProvider(IndexReader reader) { + return new CustomScoreProvider(reader) { + @Override + public float customScore(int doc, float subQueryScore, float valSrcScore) { + switch (doc % 3) { + case 1: return Float.NEGATIVE_INFINITY; + case 2: return Float.POSITIVE_INFINITY; + default: return (float) (doc - 25); + } + } + }; + } + }, tdc); + + sd = tdc.topDocs().scoreDocs; + assertEquals(50, sd.length); + assertEquals(50, tdc.getTotalHits()); + for (int j = 1; j < sd.length; j++) { + //System.out.println("inOrder="+inOrder[i]+" doc="+sd[j].doc+" score="+sd[j].score); + assertTrue("Score must be decreasing", sd[j-1].score >= sd[j].score); + if (sd[j-1].score == sd[j].score) { + assertTrue("docid must be increasing when score unchanged", sd[j-1].doc < sd[j].doc); + } + } + } + } + }