Index: CHANGES.txt =================================================================== --- CHANGES.txt (revision 797728) +++ CHANGES.txt (working copy) @@ -337,6 +337,16 @@ made public as expert, experimental APIs. These APIs may suddenly change from release to release (Jason Rutherglen via Mike McCandless). + +32. LUCENE-1754: QueryWeight.scorer() can return null if no documents + are going to be matched by the query. Similarly, + Filter.getDocIdSet() can return null if no documents are going to + be accepted by the Filter. Note that these 'can' return null, + however they don't have to and can return a Scorer/DocIdSet which + does not match / reject all documents. This is already the + behavior of some QueryWeight/Filter implementations, and is + documented here just for emphasis. (Shai Erera via Mike + McCandless) Bug fixes @@ -663,6 +673,11 @@ Removes conversions between Set and array. (Simon Willnauer via Mark Miller) +11. LUCENE-1754: BooleanQuery.queryWeight.scorer() will return null if + it won't match any documents (e.g. if there are no required and + optional scorers, or not enough optional scorers to satisfy + minShouldMatch). (Shai Erera via Mike McCandless) + Documentation Build Index: src/test/org/apache/lucene/search/TestDocIdSet.java =================================================================== --- src/test/org/apache/lucene/search/TestDocIdSet.java (revision 797728) +++ src/test/org/apache/lucene/search/TestDocIdSet.java (working copy) @@ -22,7 +22,20 @@ import java.util.Arrays; import java.util.Iterator; +import junit.framework.Assert; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriter.MaxFieldLength; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.OpenBitSetDISI; import org.apache.lucene.util._TestUtil; public class TestDocIdSet extends LuceneTestCase { @@ -102,4 +115,30 @@ fail(); } } + + public void testNullDocIdSet() throws Exception { + // Tests that if a Filter produces a null DocIdSet, which is given to + // IndexSearcher, everything works fine. This came up in LUCENE-1754. + Directory dir = new RAMDirectory(); + IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), MaxFieldLength.UNLIMITED); + Document doc = new Document(); + doc.add(new Field("c", "val", Store.NO, Index.NOT_ANALYZED_NO_NORMS)); + writer.addDocument(doc); + writer.close(); + + // First verify the document is searchable. + IndexSearcher searcher = new IndexSearcher(dir, true); + Assert.assertEquals(1, searcher.search(new MatchAllDocsQuery(), 10).totalHits); + + // Now search w/ a Filter which returns a null DocIdSet + Filter f = new Filter() { + public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + return null; + } + }; + + Assert.assertEquals(0, searcher.search(new MatchAllDocsQuery(), f, 10).totalHits); + searcher.close(); + } + } Index: src/test/org/apache/lucene/search/QueryUtils.java =================================================================== --- src/test/org/apache/lucene/search/QueryUtils.java (revision 797728) +++ src/test/org/apache/lucene/search/QueryUtils.java (working copy) @@ -152,7 +152,10 @@ final QueryWeight w = q.queryWeight(s); final Scorer scorer = w.scorer(s.getIndexReader(), true, false); - + if (scorer == null) { + continue; + } + // FUTURE: ensure scorer.doc()==-1 final int[] sdoc = new int[] {-1}; @@ -253,8 +256,10 @@ }); QueryWeight w = q.queryWeight(s); Scorer scorer = w.scorer(s.getIndexReader(), true, false); - boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS; - if (more) - Assert.assertFalse("query's last doc was "+lastDoc[0]+" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more); + if (scorer != null) { + boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS; + if (more) + Assert.assertFalse("query's last doc was "+lastDoc[0]+" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more); + } } } Index: src/java/org/apache/lucene/search/ConstantScoreQuery.java =================================================================== --- src/java/org/apache/lucene/search/ConstantScoreQuery.java (revision 797728) +++ src/java/org/apache/lucene/search/ConstantScoreQuery.java (working copy) @@ -113,7 +113,17 @@ public ConstantScorer(Similarity similarity, IndexReader reader, QueryWeight w) throws IOException { super(similarity); theScore = w.getValue(); - docIdSetIterator = filter.getDocIdSet(reader).iterator(); + DocIdSet docIdSet = filter.getDocIdSet(reader); + if (docIdSet == null) { + docIdSetIterator = EmptyDocIdSetIterator.getInstance(); + } else { + DocIdSetIterator iter = docIdSet.iterator(); + if (iter == null) { + docIdSetIterator = EmptyDocIdSetIterator.getInstance(); + } else { + docIdSetIterator = iter; + } + } } /** @deprecated use {@link #nextDoc()} instead. */ Index: src/java/org/apache/lucene/search/BooleanScorer2.java =================================================================== --- src/java/org/apache/lucene/search/BooleanScorer2.java (revision 797728) +++ src/java/org/apache/lucene/search/BooleanScorer2.java (working copy) @@ -218,36 +218,26 @@ } private Scorer makeCountingSumScorerNoReq() throws IOException { // No required scorers - if (optionalScorers.size() == 0) { - return new NonMatchingScorer(); // no clauses or only prohibited clauses - } else { // No required scorers. At least one optional scorer. - // minNrShouldMatch optional scorers are required, but at least 1 - int nrOptRequired = (minNrShouldMatch < 1) ? 1 : minNrShouldMatch; - if (optionalScorers.size() < nrOptRequired) { - return new NonMatchingScorer(); // fewer optional clauses than minimum (at least 1) that should match - } else { // optionalScorers.size() >= nrOptRequired, no required scorers - Scorer requiredCountingSumScorer = - (optionalScorers.size() > nrOptRequired) - ? countingDisjunctionSumScorer(optionalScorers, nrOptRequired) - : // optionalScorers.size() == nrOptRequired (all optional scorers are required), no required scorers - (optionalScorers.size() == 1) - ? new SingleMatchScorer((Scorer) optionalScorers.get(0)) - : countingConjunctionSumScorer(optionalScorers); - return addProhibitedScorers(requiredCountingSumScorer); - } - } + // minNrShouldMatch optional scorers are required, but at least 1 + int nrOptRequired = (minNrShouldMatch < 1) ? 1 : minNrShouldMatch; + Scorer requiredCountingSumScorer; + if (optionalScorers.size() > nrOptRequired) + requiredCountingSumScorer = countingDisjunctionSumScorer(optionalScorers, nrOptRequired); + else if (optionalScorers.size() == 1) + requiredCountingSumScorer = new SingleMatchScorer((Scorer) optionalScorers.get(0)); + else + requiredCountingSumScorer = countingConjunctionSumScorer(optionalScorers); + return addProhibitedScorers(requiredCountingSumScorer); } private Scorer makeCountingSumScorerSomeReq() throws IOException { // At least one required scorer. - if (optionalScorers.size() < minNrShouldMatch) { - return new NonMatchingScorer(); // fewer optional clauses than minimum that should match - } else if (optionalScorers.size() == minNrShouldMatch) { // all optional scorers also required. + if (optionalScorers.size() == minNrShouldMatch) { // all optional scorers also required. ArrayList allReq = new ArrayList(requiredScorers); allReq.addAll(optionalScorers); return addProhibitedScorers(countingConjunctionSumScorer(allReq)); } else { // optionalScorers.size() > minNrShouldMatch, and at least one required scorer Scorer requiredCountingSumScorer = - (requiredScorers.size() == 1) + requiredScorers.size() == 1 ? new SingleMatchScorer((Scorer) requiredScorers.get(0)) : countingConjunctionSumScorer(requiredScorers); if (minNrShouldMatch > 0) { // use a required disjunction scorer over the optional scorers @@ -260,9 +250,10 @@ } else { // minNrShouldMatch == 0 return new ReqOptSumScorer( addProhibitedScorers(requiredCountingSumScorer), - ((optionalScorers.size() == 1) + optionalScorers.size() == 1 ? new SingleMatchScorer((Scorer) optionalScorers.get(0)) - : countingDisjunctionSumScorer(optionalScorers, 1))); // require 1 in combined, optional scorer. + // require 1 in combined, optional scorer. + : countingDisjunctionSumScorer(optionalScorers, 1)); } } } Index: src/java/org/apache/lucene/search/Filter.java =================================================================== --- src/java/org/apache/lucene/search/Filter.java (revision 797728) +++ src/java/org/apache/lucene/search/Filter.java (working copy) @@ -39,10 +39,12 @@ public BitSet bits(IndexReader reader) throws IOException { throw new UnsupportedOperationException(); } - + /** - * @return a DocIdSet that provides the documents which should be - * permitted or prohibited in search results. + * @return a DocIdSet that provides the documents which should be permitted or + * prohibited in search results. NOTE: null can be returned if + * no documents will be accepted by this Filter. + * * @see DocIdBitSet */ public DocIdSet getDocIdSet(IndexReader reader) throws IOException { Index: src/java/org/apache/lucene/search/QueryWeight.java =================================================================== --- src/java/org/apache/lucene/search/QueryWeight.java (revision 797728) +++ src/java/org/apache/lucene/search/QueryWeight.java (working copy) @@ -75,8 +75,11 @@ *
* NOTE: even if scoreDocsInOrder is false, it is
* recommended to check whether the returned Scorer indeed scores
- * documents out of order (i.e., call {@link #scoresDocsOutOfOrder()}), as some
- * Scorer implementations will always return documents in-order.
+ * documents out of order (i.e., call {@link #scoresDocsOutOfOrder()}), as
+ * some Scorer implementations will always return documents
+ * in-order.
+ * NOTE: null can be returned if no documents will be scored by this
+ * query.
*
* @param reader
* the {@link IndexReader} for which to return the {@link Scorer}.
Index: src/java/org/apache/lucene/search/MultiPhraseQuery.java
===================================================================
--- src/java/org/apache/lucene/search/MultiPhraseQuery.java (revision 797728)
+++ src/java/org/apache/lucene/search/MultiPhraseQuery.java (working copy)
@@ -217,7 +217,11 @@
fieldExpl.setDescription("fieldWeight("+getQuery()+" in "+doc+
"), product of:");
- Explanation tfExpl = scorer(reader, true, false).explain(doc);
+ Scorer scorer = scorer(reader, true, false);
+ if (scorer == null) {
+ return new Explanation(0.0f, "no matching docs");
+ }
+ Explanation tfExpl = scorer.explain(doc);
fieldExpl.addDetail(tfExpl);
fieldExpl.addDetail(idfExpl);
Index: src/java/org/apache/lucene/search/FilteredQuery.java
===================================================================
--- src/java/org/apache/lucene/search/FilteredQuery.java (revision 797728)
+++ src/java/org/apache/lucene/search/FilteredQuery.java (working copy)
@@ -82,7 +82,11 @@
inner.addDetail(preBoost);
}
Filter f = FilteredQuery.this.filter;
- DocIdSetIterator docIdSetIterator = f.getDocIdSet(ir).iterator();
+ DocIdSet docIdSet = f.getDocIdSet(ir);
+ DocIdSetIterator docIdSetIterator = docIdSet == null ? EmptyDocIdSetIterator.getInstance() : docIdSet.iterator();
+ if (docIdSetIterator == null) {
+ docIdSetIterator = EmptyDocIdSetIterator.getInstance();
+ }
if (docIdSetIterator.advance(i) == i) {
return inner;
} else {
@@ -100,7 +104,17 @@
public Scorer scorer(IndexReader indexReader, boolean scoreDocsInOrder, boolean topScorer)
throws IOException {
final Scorer scorer = weight.scorer(indexReader, scoreDocsInOrder, false);
- final DocIdSetIterator docIdSetIterator = filter.getDocIdSet(indexReader).iterator();
+ if (scorer == null) {
+ return null;
+ }
+ DocIdSet docIdSet = filter.getDocIdSet(indexReader);
+ if (docIdSet == null) {
+ return null;
+ }
+ final DocIdSetIterator docIdSetIterator = docIdSet.iterator();
+ if (docIdSetIterator == null) {
+ return null;
+ }
return new Scorer(similarity) {
Index: src/java/org/apache/lucene/search/CachingWrapperFilter.java
===================================================================
--- src/java/org/apache/lucene/search/CachingWrapperFilter.java (revision 797728)
+++ src/java/org/apache/lucene/search/CachingWrapperFilter.java (working copy)
@@ -100,12 +100,13 @@
final DocIdSet docIdSet = docIdSetToCache(filter.getDocIdSet(reader), reader);
- synchronized (cache) { // update cache
- cache.put(reader, docIdSet);
+ if (docIdSet != null) {
+ synchronized (cache) { // update cache
+ cache.put(reader, docIdSet);
+ }
}
return docIdSet;
-
}
public String toString() {
Index: src/java/org/apache/lucene/search/function/CustomScoreQuery.java
===================================================================
--- src/java/org/apache/lucene/search/function/CustomScoreQuery.java (revision 797728)
+++ src/java/org/apache/lucene/search/function/CustomScoreQuery.java (working copy)
@@ -331,6 +331,9 @@
// false for "topScorer" because we will not invoke
// score(Collector) on these scorers:
Scorer subQueryScorer = subQueryWeight.scorer(reader, true, false);
+ if (subQueryScorer == null) {
+ return null;
+ }
Scorer[] valSrcScorers = new Scorer[valSrcWeights.length];
for(int i = 0; i < valSrcScorers.length; i++) {
valSrcScorers[i] = valSrcWeights[i].scorer(reader, true, false);
@@ -339,7 +342,8 @@
}
public Explanation explain(IndexReader reader, int doc) throws IOException {
- return scorer(reader, true, false).explain(doc);
+ Scorer scorer = scorer(reader, true, false);
+ return scorer == null ? new Explanation(0.0f, "no matching docs") : scorer.explain(doc);
}
public boolean scoresDocsOutOfOrder() {
Index: src/java/org/apache/lucene/search/BooleanQuery.java
===================================================================
--- src/java/org/apache/lucene/search/BooleanQuery.java (revision 797728)
+++ src/java/org/apache/lucene/search/BooleanQuery.java (working copy)
@@ -227,9 +227,12 @@
float sum = 0.0f;
boolean fail = false;
int shouldMatchCount = 0;
- for (int i = 0 ; i < weights.size(); i++) {
- BooleanClause c = (BooleanClause)clauses.get(i);
- QueryWeight w = (QueryWeight)weights.get(i);
+ for (Iterator wIter = weights.iterator(), cIter = clauses.iterator(); wIter.hasNext();) {
+ QueryWeight w = (QueryWeight) wIter.next();
+ BooleanClause c = (BooleanClause) cIter.next();
+ if (w.scorer(reader, true, true) == null) {
+ continue;
+ }
Explanation e = w.explain(reader, doc);
if (!c.isProhibited()) maxCoord++;
if (e.isMatch()) {
@@ -244,7 +247,7 @@
sumExpl.addDetail(r);
fail = true;
}
- if (c.getOccur().equals(Occur.SHOULD))
+ if (c.getOccur() == Occur.SHOULD)
shouldMatchCount++;
} else if (c.isRequired()) {
Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")");
@@ -312,6 +315,16 @@
return new BooleanScorer(similarity, minNrShouldMatch, optional, prohibited);
}
+ if (required.size() == 0 && optional.size() == 0) {
+ // no required and optional clauses.
+ return null;
+ } else if (optional.size() < minNrShouldMatch) {
+ // either >1 req scorer, or there are 0 req scorers and at least 1
+ // optional scorer. Therefore if there are not enough optional scorers
+ // no documents will be matched by the query
+ return null;
+ }
+
// Return a BooleanScorer2
return new BooleanScorer2(similarity, minNrShouldMatch, required, prohibited, optional);
}
Index: src/java/org/apache/lucene/search/PhraseQuery.java
===================================================================
--- src/java/org/apache/lucene/search/PhraseQuery.java (revision 797728)
+++ src/java/org/apache/lucene/search/PhraseQuery.java (working copy)
@@ -209,7 +209,11 @@
fieldExpl.setDescription("fieldWeight("+field+":"+query+" in "+doc+
"), product of:");
- Explanation tfExpl = scorer(reader, true, false).explain(doc);
+ Scorer scorer = scorer(reader, true, false);
+ if (scorer == null) {
+ return new Explanation(0.0f, "no matching docs");
+ }
+ Explanation tfExpl = scorer.explain(doc);
fieldExpl.addDetail(tfExpl);
fieldExpl.addDetail(idfExpl);
Index: src/java/org/apache/lucene/search/DocIdSet.java
===================================================================
--- src/java/org/apache/lucene/search/DocIdSet.java (revision 797728)
+++ src/java/org/apache/lucene/search/DocIdSet.java (working copy)
@@ -30,6 +30,8 @@
* implemented using a {@link SortedVIntList}). */
public static final DocIdSet EMPTY_DOCIDSET = new SortedVIntList(new int[0]);
- /** Provides a {@link DocIdSetIterator} to access the set. */
+ /** Provides a {@link DocIdSetIterator} to access the set.
+ * This may (but is not required to) return null if there
+ * are no docs that match. */
public abstract DocIdSetIterator iterator() throws IOException;
}
Index: src/java/org/apache/lucene/search/NonMatchingScorer.java
===================================================================
--- src/java/org/apache/lucene/search/NonMatchingScorer.java (revision 797728)
+++ src/java/org/apache/lucene/search/NonMatchingScorer.java (working copy)
@@ -1,50 +0,0 @@
-package org.apache.lucene.search;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-/** A scorer that matches no document at all. */
-class NonMatchingScorer extends Scorer {
- public NonMatchingScorer() { super(null); } // no similarity used
-
- /** @deprecated use {@link #docID()} instead. */
- public int doc() { throw new UnsupportedOperationException(); }
-
- public int docID() { return NO_MORE_DOCS; }
-
- /** @deprecated use {@link #nextDoc()} instead. */
- public boolean next() throws IOException { return false; }
-
- public int nextDoc() throws IOException { return NO_MORE_DOCS; }
-
- public float score() { throw new UnsupportedOperationException(); }
-
- /** @deprecated use {@link #advance(int)} instead. */
- public boolean skipTo(int target) { return false; }
-
- public int advance(int target) { return NO_MORE_DOCS; }
-
- public Explanation explain(int doc) {
- Explanation e = new Explanation();
- e.setDescription("No document matches.");
- return e;
- }
-}
-
-
Index: src/java/org/apache/lucene/search/IndexSearcher.java
===================================================================
--- src/java/org/apache/lucene/search/IndexSearcher.java (revision 797728)
+++ src/java/org/apache/lucene/search/IndexSearcher.java (working copy)
@@ -267,8 +267,17 @@
assert docID == -1 || docID == DocIdSetIterator.NO_MORE_DOCS;
// CHECKME: use ConjunctionScorer here?
- DocIdSetIterator filterIter = filter.getDocIdSet(reader).iterator();
+ DocIdSet filterDocIdSet = filter.getDocIdSet(reader);
+ if (filterDocIdSet == null) {
+ // this means the filter does not accept any documents.
+ return;
+ }
+ DocIdSetIterator filterIter = filterDocIdSet.iterator();
+ if (filterIter == null) {
+ // this means the filter does not accept any documents.
+ return;
+ }
int filterDoc = filterIter.nextDoc();
int scorerDoc = scorer.advance(filterDoc);
Index: src/java/org/apache/lucene/index/DocumentsWriter.java
===================================================================
--- src/java/org/apache/lucene/index/DocumentsWriter.java (revision 797728)
+++ src/java/org/apache/lucene/index/DocumentsWriter.java (working copy)
@@ -1007,12 +1007,14 @@
int limit = ((Integer) entry.getValue()).intValue();
QueryWeight weight = query.queryWeight(searcher);
Scorer scorer = weight.scorer(reader, true, false);
- while(true) {
- int doc = scorer.nextDoc();
- if (((long) docIDStart) + doc >= limit)
- break;
- reader.deleteDocument(doc);
- any = true;
+ if (scorer != null) {
+ while(true) {
+ int doc = scorer.nextDoc();
+ if (((long) docIDStart) + doc >= limit)
+ break;
+ reader.deleteDocument(doc);
+ any = true;
+ }
}
}
searcher.close();
Index: contrib/miscellaneous/src/java/org/apache/lucene/misc/ChainedFilter.java
===================================================================
--- contrib/miscellaneous/src/java/org/apache/lucene/misc/ChainedFilter.java (revision 797728)
+++ contrib/miscellaneous/src/java/org/apache/lucene/misc/ChainedFilter.java (working copy)
@@ -60,6 +60,7 @@
import java.io.IOException;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.EmptyDocIdSetIterator;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.OpenBitSetDISI;
import org.apache.lucene.util.SortedVIntList;
@@ -146,9 +147,18 @@
}
private DocIdSetIterator getDISI(Filter filter, IndexReader reader)
- throws IOException
- {
- return filter.getDocIdSet(reader).iterator();
+ throws IOException {
+ DocIdSet docIdSet = filter.getDocIdSet(reader);
+ if (docIdSet == null) {
+ return EmptyDocIdSetIterator.getInstance();
+ } else {
+ DocIdSetIterator iter = docIdSet.iterator();
+ if (iter == null) {
+ return EmptyDocIdSetIterator.getInstance();
+ } else {
+ return iter;
+ }
+ }
}
private OpenBitSetDISI initialResult(IndexReader reader, int logic, int[] index)
@@ -241,13 +251,11 @@
}
private void doChain(OpenBitSetDISI result, int logic, DocIdSet dis)
- throws IOException
- {
+ throws IOException {
if (dis instanceof OpenBitSet) {
// optimized case for OpenBitSets
- switch (logic)
- {
+ switch (logic) {
case OR:
result.or((OpenBitSet) dis);
break;
@@ -265,9 +273,17 @@
break;
}
} else {
- DocIdSetIterator disi = dis.iterator();
- switch (logic)
- {
+ DocIdSetIterator disi;
+ if (dis == null) {
+ disi = EmptyDocIdSetIterator.getInstance();
+ } else {
+ disi = dis.iterator();
+ if (disi == null) {
+ disi = EmptyDocIdSetIterator.getInstance();
+ }
+ }
+
+ switch (logic) {
case OR:
result.inPlaceOr(disi);
break;