Index: modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java =================================================================== --- modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java (revision 1171976) +++ modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java (working copy) @@ -1034,7 +1034,7 @@ } public TopDocs search(Weight weight, int topN) throws IOException { - return search(ctx, weight, null, topN); + return search(ctx, weight, null, null, topN); } @Override Index: lucene/src/test/org/apache/lucene/search/TestTopDocsMerge.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestTopDocsMerge.java (revision 1171976) +++ lucene/src/test/org/apache/lucene/search/TestTopDocsMerge.java (working copy) @@ -48,7 +48,7 @@ } public TopDocs search(Weight weight, int topN) throws IOException { - return search(ctx, weight, null, topN); + return search(ctx, weight, null, null, topN); } @Override Index: lucene/src/test/org/apache/lucene/search/TestSearchAfter.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestSearchAfter.java (revision 0) +++ lucene/src/test/org/apache/lucene/search/TestSearchAfter.java (revision 0) @@ -0,0 +1,99 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.StringField; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.English; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; + +/** + * Tests IndexSearcher's searchAfter() method + */ +public class TestSearchAfter extends LuceneTestCase { + private Directory dir; + private IndexReader reader; + private IndexSearcher searcher; + + @Override + public void setUp() throws Exception { + super.setUp(); + dir = newDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(random, dir); + int numDocs = atLeast(200); + for (int i = 0; i < numDocs; i++) { + Document document = new Document(); + document.add(newField("english", English.intToEnglish(i), StringField.TYPE_UNSTORED)); + document.add(newField("oddeven", (i % 2 == 0) ? "even" : "odd", StringField.TYPE_UNSTORED)); + iw.addDocument(document); + } + reader = iw.getReader(); + iw.close(); + searcher = newSearcher(reader); + } + + @Override + public void tearDown() throws Exception { + searcher.close(); + reader.close(); + dir.close(); + super.tearDown(); + } + + public void testQueries() throws Exception { + Filter odd = new QueryWrapperFilter(new TermQuery(new Term("oddeven", "odd"))); + assertQuery(new MatchAllDocsQuery(), null); + assertQuery(new TermQuery(new Term("english", "one")), null); + assertQuery(new MatchAllDocsQuery(), odd); + assertQuery(new TermQuery(new Term("english", "four")), odd); + BooleanQuery bq = new BooleanQuery(); + bq.add(new TermQuery(new Term("english", "one")), BooleanClause.Occur.SHOULD); + bq.add(new TermQuery(new Term("oddeven", "even")), BooleanClause.Occur.SHOULD); + assertQuery(bq, null); + } + + void assertQuery(Query query, Filter filter) throws Exception { + TopDocs all = searcher.search(query, filter, searcher.maxDoc()); + int pageSize = _TestUtil.nextInt(random, 1, searcher.maxDoc()*2); + int pageStart = 0; + ScoreDoc lastBottom = null; + while (pageStart < all.totalHits) { + TopDocs paged = searcher.searchAfter(lastBottom, query, filter, pageSize); + if (paged.scoreDocs.length == 0) { + break; + } + assertPage(pageStart, all, paged); + pageStart += paged.scoreDocs.length; + lastBottom = paged.scoreDocs[paged.scoreDocs.length - 1]; + } + assertEquals(all.scoreDocs.length, pageStart); + } + + static void assertPage(int pageStart, TopDocs all, TopDocs paged) { + assertEquals(all.totalHits, paged.totalHits); + for (int i = 0; i < paged.scoreDocs.length; i++) { + assertEquals(all.scoreDocs[pageStart + i].doc, paged.scoreDocs[i].doc); + assertEquals(all.scoreDocs[pageStart + i].score, paged.scoreDocs[i].score, 0f); + } + } +} Index: lucene/src/java/org/apache/lucene/search/TopScoreDocCollector.java =================================================================== --- lucene/src/java/org/apache/lucene/search/TopScoreDocCollector.java (revision 1171976) +++ lucene/src/java/org/apache/lucene/search/TopScoreDocCollector.java (working copy) @@ -67,7 +67,68 @@ return false; } } + + // Assumes docs are scored in order. + private static class InOrderPagingScoreDocCollector extends TopScoreDocCollector { + private final ScoreDoc after; + // this is always after.doc - docBase, to save an add when score == after.score + private int afterDoc; + private int collectedHits; + private InOrderPagingScoreDocCollector(ScoreDoc after, int numHits) { + super(numHits); + this.after = after; + } + + @Override + public void collect(int doc) throws IOException { + float score = scorer.score(); + + // This collector cannot handle these scores: + assert score != Float.NEGATIVE_INFINITY; + assert !Float.isNaN(score); + + totalHits++; + + if (score > after.score || (score == after.score && doc <= afterDoc)) { + // hit was collected on a previous page + return; + } + + if (score <= pqTop.score) { + // Since docs are returned in-order (i.e., increasing doc Id), a document + // with equal score to pqTop.score cannot compete since HitQueue favors + // documents with lower doc Ids. Therefore reject those docs too. + return; + } + collectedHits++; + pqTop.doc = doc + docBase; + pqTop.score = score; + pqTop = pq.updateTop(); + } + + @Override + public boolean acceptsDocsOutOfOrder() { + return false; + } + + @Override + public void setNextReader(AtomicReaderContext context) { + super.setNextReader(context); + afterDoc = after.doc - docBase; + } + + @Override + protected int topDocsSize() { + return collectedHits < pq.size() ? collectedHits : pq.size(); + } + + @Override + protected TopDocs newTopDocs(ScoreDoc[] results, int start) { + return results == null ? new TopDocs(totalHits, new ScoreDoc[0], Float.NaN) : new TopDocs(totalHits, results); + } + } + // Assumes docs are scored out of order. private static class OutOfOrderTopScoreDocCollector extends TopScoreDocCollector { private OutOfOrderTopScoreDocCollector(int numHits) { @@ -101,7 +162,68 @@ return true; } } + + // Assumes docs are scored out of order. + private static class OutOfOrderPagingScoreDocCollector extends TopScoreDocCollector { + private final ScoreDoc after; + // this is always after.doc - docBase, to save an add when score == after.score + private int afterDoc; + private int collectedHits; + private OutOfOrderPagingScoreDocCollector(ScoreDoc after, int numHits) { + super(numHits); + this.after = after; + } + + @Override + public void collect(int doc) throws IOException { + float score = scorer.score(); + + // This collector cannot handle NaN + assert !Float.isNaN(score); + + totalHits++; + if (score > after.score || (score == after.score && doc <= afterDoc)) { + // hit was collected on a previous page + return; + } + if (score < pqTop.score) { + // Doesn't compete w/ bottom entry in queue + return; + } + doc += docBase; + if (score == pqTop.score && doc > pqTop.doc) { + // Break tie in score by doc ID: + return; + } + collectedHits++; + pqTop.doc = doc; + pqTop.score = score; + pqTop = pq.updateTop(); + } + + @Override + public boolean acceptsDocsOutOfOrder() { + return true; + } + + @Override + public void setNextReader(AtomicReaderContext context) { + super.setNextReader(context); + afterDoc = after.doc - docBase; + } + + @Override + protected int topDocsSize() { + return collectedHits < pq.size() ? collectedHits : pq.size(); + } + + @Override + protected TopDocs newTopDocs(ScoreDoc[] results, int start) { + return results == null ? new TopDocs(totalHits, new ScoreDoc[0], Float.NaN) : new TopDocs(totalHits, results); + } + } + /** * Creates a new {@link TopScoreDocCollector} given the number of hits to * collect and whether documents are scored in order by the input @@ -113,15 +235,33 @@ * objects. */ public static TopScoreDocCollector create(int numHits, boolean docsScoredInOrder) { + return create(numHits, null, docsScoredInOrder); + } + + /** + * Creates a new {@link TopScoreDocCollector} given the number of hits to + * collect, the bottom of the previous page, and whether documents are scored in order by the input + * {@link Scorer} to {@link #setScorer(Scorer)}. + * + *
NOTE: The instances returned by this method
+ * pre-allocate a full array of length
+ * numHits, and fill the array with sentinel
+ * objects.
+ */
+ public static TopScoreDocCollector create(int numHits, ScoreDoc after, boolean docsScoredInOrder) {
if (numHits <= 0) {
throw new IllegalArgumentException("numHits must be > 0; please use TotalHitCountCollector if you just need the total hit count");
}
-
+
if (docsScoredInOrder) {
- return new InOrderTopScoreDocCollector(numHits);
+ return after == null
+ ? new InOrderTopScoreDocCollector(numHits)
+ : new InOrderPagingScoreDocCollector(after, numHits);
} else {
- return new OutOfOrderTopScoreDocCollector(numHits);
+ return after == null
+ ? new OutOfOrderTopScoreDocCollector(numHits)
+ : new OutOfOrderPagingScoreDocCollector(after, numHits);
}
}
Index: lucene/src/java/org/apache/lucene/search/TopDocsCollector.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/TopDocsCollector.java (revision 1171976)
+++ lucene/src/java/org/apache/lucene/search/TopDocsCollector.java (working copy)
@@ -77,12 +77,20 @@
return totalHits;
}
+ /** The number of valid PQ entries */
+ protected int topDocsSize() {
+ // In case pq was populated with sentinel values, there might be less
+ // results than pq.size(). Therefore return all results until either
+ // pq.size() or totalHits.
+ return totalHits < pq.size() ? totalHits : pq.size();
+ }
+
/** Returns the top docs that were collected by this collector. */
public TopDocs topDocs() {
// In case pq was populated with sentinel values, there might be less
// results than pq.size(). Therefore return all results until either
// pq.size() or totalHits.
- return topDocs(0, totalHits < pq.size() ? totalHits : pq.size());
+ return topDocs(0, topDocsSize());
}
/**
@@ -101,7 +109,7 @@
// In case pq was populated with sentinel values, there might be less
// results than pq.size(). Therefore return all results until either
// pq.size() or totalHits.
- return topDocs(start, totalHits < pq.size() ? totalHits : pq.size());
+ return topDocs(start, topDocsSize());
}
/**
@@ -123,10 +131,12 @@
// In case pq was populated with sentinel values, there might be less
// results than pq.size(). Therefore return all results until either
// pq.size() or totalHits.
- int size = totalHits < pq.size() ? totalHits : pq.size();
+ int size = topDocsSize();
// Don't bother to throw an exception, just return an empty TopDocs in case
// the parameters are invalid or out of range.
+ // TODO: shouldn't we throw IAE if apps give bad params here so they dont
+ // have sneaky silent bugs?
if (start < 0 || start >= size || howMany <= 0) {
return newTopDocs(null, start);
}
Index: lucene/src/java/org/apache/lucene/search/IndexSearcher.java
===================================================================
--- lucene/src/java/org/apache/lucene/search/IndexSearcher.java (revision 1171976)
+++ lucene/src/java/org/apache/lucene/search/IndexSearcher.java (working copy)
@@ -276,6 +276,34 @@
}
/** Finds the top n
+ * hits for query, applying filter if non-null,
+ * where all results are after a previous result (after).
+ *
+ * By passing the bottom result from a previous page as after,
+ * this method can be used for efficient 'deep-paging' across potentially
+ * large result sets.
+ *
+ * @throws BooleanQuery.TooManyClauses
+ */
+ public TopDocs searchAfter(ScoreDoc after, Query query, int n) throws IOException {
+ return searchAfter(after, query, null, n);
+ }
+
+ /** Finds the top n
+ * hits for query, applying filter if non-null,
+ * where all results are after a previous result (after).
+ *
+ * By passing the bottom result from a previous page as after,
+ * this method can be used for efficient 'deep-paging' across potentially
+ * large result sets.
+ *
+ * @throws BooleanQuery.TooManyClauses
+ */
+ public TopDocs searchAfter(ScoreDoc after, Query query, Filter filter, int n) throws IOException {
+ return search(createNormalizedWeight(query), filter, after, n);
+ }
+
+ /** Finds the top n
* hits for query.
*
* @throws BooleanQuery.TooManyClauses
@@ -293,7 +321,7 @@
*/
public TopDocs search(Query query, Filter filter, int n)
throws IOException {
- return search(createNormalizedWeight(query), filter, n);
+ return search(createNormalizedWeight(query), filter, null, n);
}
/** Lower-level search API.
@@ -371,9 +399,9 @@
* {@link IndexSearcher#search(Query,Filter,int)} instead.
* @throws BooleanQuery.TooManyClauses
*/
- protected TopDocs search(Weight weight, Filter filter, int nDocs) throws IOException {
+ protected TopDocs search(Weight weight, Filter filter, ScoreDoc after, int nDocs) throws IOException {
if (executor == null) {
- return search(leafContexts, weight, filter, nDocs);
+ return search(leafContexts, weight, filter, after, nDocs);
} else {
final HitQueue hq = new HitQueue(nDocs, false);
final Lock lock = new ReentrantLock();
@@ -381,7 +409,7 @@
for (int i = 0; i < leafSlices.length; i++) { // search each sub
runner.submit(
- new SearcherCallableNoSort(lock, this, leafSlices[i], weight, filter, nDocs, hq));
+ new SearcherCallableNoSort(lock, this, leafSlices[i], weight, filter, after, nDocs, hq));
}
int totalHits = 0;
@@ -408,14 +436,14 @@
* {@link IndexSearcher#search(Query,Filter,int)} instead.
* @throws BooleanQuery.TooManyClauses
*/
- protected TopDocs search(AtomicReaderContext[] leaves, Weight weight, Filter filter, int nDocs) throws IOException {
+ protected TopDocs search(AtomicReaderContext[] leaves, Weight weight, Filter filter, ScoreDoc after, int nDocs) throws IOException {
// single thread
int limit = reader.maxDoc();
if (limit == 0) {
limit = 1;
}
nDocs = Math.min(nDocs, limit);
- TopScoreDocCollector collector = TopScoreDocCollector.create(nDocs, !weight.scoresDocsOutOfOrder());
+ TopScoreDocCollector collector = TopScoreDocCollector.create(nDocs, after, !weight.scoresDocsOutOfOrder());
search(leaves, weight, filter, collector);
return collector.topDocs();
}
@@ -704,23 +732,25 @@
private final IndexSearcher searcher;
private final Weight weight;
private final Filter filter;
+ private final ScoreDoc after;
private final int nDocs;
private final HitQueue hq;
private final LeafSlice slice;
public SearcherCallableNoSort(Lock lock, IndexSearcher searcher, LeafSlice slice, Weight weight,
- Filter filter, int nDocs, HitQueue hq) {
+ Filter filter, ScoreDoc after, int nDocs, HitQueue hq) {
this.lock = lock;
this.searcher = searcher;
this.weight = weight;
this.filter = filter;
+ this.after = after;
this.nDocs = nDocs;
this.hq = hq;
this.slice = slice;
}
public TopDocs call() throws IOException {
- final TopDocs docs = searcher.search (slice.leaves, weight, filter, nDocs);
+ final TopDocs docs = searcher.search (slice.leaves, weight, filter, after, nDocs);
final ScoreDoc[] scoreDocs = docs.scoreDocs;
for (int j = 0; j < scoreDocs.length; j++) { // merge scoreDocs into hq
final ScoreDoc scoreDoc = scoreDocs[j];