Index: CHANGES.txt =================================================================== --- CHANGES.txt (revision 797728) +++ CHANGES.txt (working copy) @@ -337,6 +337,16 @@ made public as expert, experimental APIs. These APIs may suddenly change from release to release (Jason Rutherglen via Mike McCandless). + +32. LUCENE-1754: QueryWeight.scorer() can return null if no documents + are going to be matched by the query. Similarly, + Filter.getDocIdSet() can return null if no documents are going to + be accepted by the Filter. Note that these 'can' return null, + however they don't have to and can return a Scorer/DocIdSet which + does not match / reject all documents. This is already the + behavior of some QueryWeight/Filter implementations, and is + documented here just for emphasis. (Shai Erera via Mike + McCandless) Bug fixes @@ -663,6 +673,11 @@ Removes conversions between Set and array. (Simon Willnauer via Mark Miller) +11. LUCENE-1754: BooleanQuery.queryWeight.scorer() will return null if + it won't match any documents (e.g. if there are no required and + optional scorers, or not enough optional scorers to satisfy + minShouldMatch). (Shai Erera via Mike McCandless) + Documentation Build Index: src/test/org/apache/lucene/search/TestDocIdSet.java =================================================================== --- src/test/org/apache/lucene/search/TestDocIdSet.java (revision 797728) +++ src/test/org/apache/lucene/search/TestDocIdSet.java (working copy) @@ -22,7 +22,20 @@ import java.util.Arrays; import java.util.Iterator; +import junit.framework.Assert; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Index; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriter.MaxFieldLength; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.OpenBitSetDISI; import org.apache.lucene.util._TestUtil; public class TestDocIdSet extends LuceneTestCase { @@ -102,4 +115,30 @@ fail(); } } + + public void testNullDocIdSet() throws Exception { + // Tests that if a Filter produces a null DocIdSet, which is given to + // IndexSearcher, everything works fine. This came up in LUCENE-1754. + Directory dir = new RAMDirectory(); + IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), MaxFieldLength.UNLIMITED); + Document doc = new Document(); + doc.add(new Field("c", "val", Store.NO, Index.NOT_ANALYZED_NO_NORMS)); + writer.addDocument(doc); + writer.close(); + + // First verify the document is searchable. + IndexSearcher searcher = new IndexSearcher(dir, true); + Assert.assertEquals(1, searcher.search(new MatchAllDocsQuery(), 10).totalHits); + + // Now search w/ a Filter which returns a null DocIdSet + Filter f = new Filter() { + public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + return null; + } + }; + + Assert.assertEquals(0, searcher.search(new MatchAllDocsQuery(), f, 10).totalHits); + searcher.close(); + } + } Index: src/test/org/apache/lucene/search/QueryUtils.java =================================================================== --- src/test/org/apache/lucene/search/QueryUtils.java (revision 797728) +++ src/test/org/apache/lucene/search/QueryUtils.java (working copy) @@ -152,7 +152,10 @@ final QueryWeight w = q.queryWeight(s); final Scorer scorer = w.scorer(s.getIndexReader(), true, false); - + if (scorer == null) { + continue; + } + // FUTURE: ensure scorer.doc()==-1 final int[] sdoc = new int[] {-1}; @@ -253,8 +256,10 @@ }); QueryWeight w = q.queryWeight(s); Scorer scorer = w.scorer(s.getIndexReader(), true, false); - boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS; - if (more) - Assert.assertFalse("query's last doc was "+lastDoc[0]+" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more); + if (scorer != null) { + boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS; + if (more) + Assert.assertFalse("query's last doc was "+lastDoc[0]+" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more); + } } } Index: src/java/org/apache/lucene/search/ConstantScoreQuery.java =================================================================== --- src/java/org/apache/lucene/search/ConstantScoreQuery.java (revision 797728) +++ src/java/org/apache/lucene/search/ConstantScoreQuery.java (working copy) @@ -113,7 +113,17 @@ public ConstantScorer(Similarity similarity, IndexReader reader, QueryWeight w) throws IOException { super(similarity); theScore = w.getValue(); - docIdSetIterator = filter.getDocIdSet(reader).iterator(); + DocIdSet docIdSet = filter.getDocIdSet(reader); + if (docIdSet == null) { + docIdSetIterator = EmptyDocIdSetIterator.getInstance(); + } else { + DocIdSetIterator iter = docIdSet.iterator(); + if (iter == null) { + docIdSetIterator = EmptyDocIdSetIterator.getInstance(); + } else { + docIdSetIterator = iter; + } + } } /** @deprecated use {@link #nextDoc()} instead. */ Index: src/java/org/apache/lucene/search/BooleanScorer2.java =================================================================== --- src/java/org/apache/lucene/search/BooleanScorer2.java (revision 797728) +++ src/java/org/apache/lucene/search/BooleanScorer2.java (working copy) @@ -218,36 +218,26 @@ } private Scorer makeCountingSumScorerNoReq() throws IOException { // No required scorers - if (optionalScorers.size() == 0) { - return new NonMatchingScorer(); // no clauses or only prohibited clauses - } else { // No required scorers. At least one optional scorer. - // minNrShouldMatch optional scorers are required, but at least 1 - int nrOptRequired = (minNrShouldMatch < 1) ? 1 : minNrShouldMatch; - if (optionalScorers.size() < nrOptRequired) { - return new NonMatchingScorer(); // fewer optional clauses than minimum (at least 1) that should match - } else { // optionalScorers.size() >= nrOptRequired, no required scorers - Scorer requiredCountingSumScorer = - (optionalScorers.size() > nrOptRequired) - ? countingDisjunctionSumScorer(optionalScorers, nrOptRequired) - : // optionalScorers.size() == nrOptRequired (all optional scorers are required), no required scorers - (optionalScorers.size() == 1) - ? new SingleMatchScorer((Scorer) optionalScorers.get(0)) - : countingConjunctionSumScorer(optionalScorers); - return addProhibitedScorers(requiredCountingSumScorer); - } - } + // minNrShouldMatch optional scorers are required, but at least 1 + int nrOptRequired = (minNrShouldMatch < 1) ? 1 : minNrShouldMatch; + Scorer requiredCountingSumScorer; + if (optionalScorers.size() > nrOptRequired) + requiredCountingSumScorer = countingDisjunctionSumScorer(optionalScorers, nrOptRequired); + else if (optionalScorers.size() == 1) + requiredCountingSumScorer = new SingleMatchScorer((Scorer) optionalScorers.get(0)); + else + requiredCountingSumScorer = countingConjunctionSumScorer(optionalScorers); + return addProhibitedScorers(requiredCountingSumScorer); } private Scorer makeCountingSumScorerSomeReq() throws IOException { // At least one required scorer. - if (optionalScorers.size() < minNrShouldMatch) { - return new NonMatchingScorer(); // fewer optional clauses than minimum that should match - } else if (optionalScorers.size() == minNrShouldMatch) { // all optional scorers also required. + if (optionalScorers.size() == minNrShouldMatch) { // all optional scorers also required. ArrayList allReq = new ArrayList(requiredScorers); allReq.addAll(optionalScorers); return addProhibitedScorers(countingConjunctionSumScorer(allReq)); } else { // optionalScorers.size() > minNrShouldMatch, and at least one required scorer Scorer requiredCountingSumScorer = - (requiredScorers.size() == 1) + requiredScorers.size() == 1 ? new SingleMatchScorer((Scorer) requiredScorers.get(0)) : countingConjunctionSumScorer(requiredScorers); if (minNrShouldMatch > 0) { // use a required disjunction scorer over the optional scorers @@ -260,9 +250,10 @@ } else { // minNrShouldMatch == 0 return new ReqOptSumScorer( addProhibitedScorers(requiredCountingSumScorer), - ((optionalScorers.size() == 1) + optionalScorers.size() == 1 ? new SingleMatchScorer((Scorer) optionalScorers.get(0)) - : countingDisjunctionSumScorer(optionalScorers, 1))); // require 1 in combined, optional scorer. + // require 1 in combined, optional scorer. + : countingDisjunctionSumScorer(optionalScorers, 1)); } } } Index: src/java/org/apache/lucene/search/Filter.java =================================================================== --- src/java/org/apache/lucene/search/Filter.java (revision 797728) +++ src/java/org/apache/lucene/search/Filter.java (working copy) @@ -39,10 +39,12 @@ public BitSet bits(IndexReader reader) throws IOException { throw new UnsupportedOperationException(); } - + /** - * @return a DocIdSet that provides the documents which should be - * permitted or prohibited in search results. + * @return a DocIdSet that provides the documents which should be permitted or + * prohibited in search results. NOTE: null can be returned if + * no documents will be accepted by this Filter. + * * @see DocIdBitSet */ public DocIdSet getDocIdSet(IndexReader reader) throws IOException { Index: src/java/org/apache/lucene/search/QueryWeight.java =================================================================== --- src/java/org/apache/lucene/search/QueryWeight.java (revision 797728) +++ src/java/org/apache/lucene/search/QueryWeight.java (working copy) @@ -75,8 +75,11 @@ *

* NOTE: even if scoreDocsInOrder is false, it is * recommended to check whether the returned Scorer indeed scores - * documents out of order (i.e., call {@link #scoresDocsOutOfOrder()}), as some - * Scorer implementations will always return documents in-order. + * documents out of order (i.e., call {@link #scoresDocsOutOfOrder()}), as + * some Scorer implementations will always return documents + * in-order.
+ * NOTE: null can be returned if no documents will be scored by this + * query. * * @param reader * the {@link IndexReader} for which to return the {@link Scorer}. Index: src/java/org/apache/lucene/search/MultiPhraseQuery.java =================================================================== --- src/java/org/apache/lucene/search/MultiPhraseQuery.java (revision 797728) +++ src/java/org/apache/lucene/search/MultiPhraseQuery.java (working copy) @@ -217,7 +217,11 @@ fieldExpl.setDescription("fieldWeight("+getQuery()+" in "+doc+ "), product of:"); - Explanation tfExpl = scorer(reader, true, false).explain(doc); + Scorer scorer = scorer(reader, true, false); + if (scorer == null) { + return new Explanation(0.0f, "no matching docs"); + } + Explanation tfExpl = scorer.explain(doc); fieldExpl.addDetail(tfExpl); fieldExpl.addDetail(idfExpl); Index: src/java/org/apache/lucene/search/FilteredQuery.java =================================================================== --- src/java/org/apache/lucene/search/FilteredQuery.java (revision 797728) +++ src/java/org/apache/lucene/search/FilteredQuery.java (working copy) @@ -82,7 +82,11 @@ inner.addDetail(preBoost); } Filter f = FilteredQuery.this.filter; - DocIdSetIterator docIdSetIterator = f.getDocIdSet(ir).iterator(); + DocIdSet docIdSet = f.getDocIdSet(ir); + DocIdSetIterator docIdSetIterator = docIdSet == null ? EmptyDocIdSetIterator.getInstance() : docIdSet.iterator(); + if (docIdSetIterator == null) { + docIdSetIterator = EmptyDocIdSetIterator.getInstance(); + } if (docIdSetIterator.advance(i) == i) { return inner; } else { @@ -100,7 +104,17 @@ public Scorer scorer(IndexReader indexReader, boolean scoreDocsInOrder, boolean topScorer) throws IOException { final Scorer scorer = weight.scorer(indexReader, scoreDocsInOrder, false); - final DocIdSetIterator docIdSetIterator = filter.getDocIdSet(indexReader).iterator(); + if (scorer == null) { + return null; + } + DocIdSet docIdSet = filter.getDocIdSet(indexReader); + if (docIdSet == null) { + return null; + } + final DocIdSetIterator docIdSetIterator = docIdSet.iterator(); + if (docIdSetIterator == null) { + return null; + } return new Scorer(similarity) { Index: src/java/org/apache/lucene/search/CachingWrapperFilter.java =================================================================== --- src/java/org/apache/lucene/search/CachingWrapperFilter.java (revision 797728) +++ src/java/org/apache/lucene/search/CachingWrapperFilter.java (working copy) @@ -100,12 +100,13 @@ final DocIdSet docIdSet = docIdSetToCache(filter.getDocIdSet(reader), reader); - synchronized (cache) { // update cache - cache.put(reader, docIdSet); + if (docIdSet != null) { + synchronized (cache) { // update cache + cache.put(reader, docIdSet); + } } return docIdSet; - } public String toString() { Index: src/java/org/apache/lucene/search/function/CustomScoreQuery.java =================================================================== --- src/java/org/apache/lucene/search/function/CustomScoreQuery.java (revision 797728) +++ src/java/org/apache/lucene/search/function/CustomScoreQuery.java (working copy) @@ -331,6 +331,9 @@ // false for "topScorer" because we will not invoke // score(Collector) on these scorers: Scorer subQueryScorer = subQueryWeight.scorer(reader, true, false); + if (subQueryScorer == null) { + return null; + } Scorer[] valSrcScorers = new Scorer[valSrcWeights.length]; for(int i = 0; i < valSrcScorers.length; i++) { valSrcScorers[i] = valSrcWeights[i].scorer(reader, true, false); @@ -339,7 +342,8 @@ } public Explanation explain(IndexReader reader, int doc) throws IOException { - return scorer(reader, true, false).explain(doc); + Scorer scorer = scorer(reader, true, false); + return scorer == null ? new Explanation(0.0f, "no matching docs") : scorer.explain(doc); } public boolean scoresDocsOutOfOrder() { Index: src/java/org/apache/lucene/search/BooleanQuery.java =================================================================== --- src/java/org/apache/lucene/search/BooleanQuery.java (revision 797728) +++ src/java/org/apache/lucene/search/BooleanQuery.java (working copy) @@ -227,9 +227,12 @@ float sum = 0.0f; boolean fail = false; int shouldMatchCount = 0; - for (int i = 0 ; i < weights.size(); i++) { - BooleanClause c = (BooleanClause)clauses.get(i); - QueryWeight w = (QueryWeight)weights.get(i); + for (Iterator wIter = weights.iterator(), cIter = clauses.iterator(); wIter.hasNext();) { + QueryWeight w = (QueryWeight) wIter.next(); + BooleanClause c = (BooleanClause) cIter.next(); + if (w.scorer(reader, true, true) == null) { + continue; + } Explanation e = w.explain(reader, doc); if (!c.isProhibited()) maxCoord++; if (e.isMatch()) { @@ -244,7 +247,7 @@ sumExpl.addDetail(r); fail = true; } - if (c.getOccur().equals(Occur.SHOULD)) + if (c.getOccur() == Occur.SHOULD) shouldMatchCount++; } else if (c.isRequired()) { Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")"); @@ -312,6 +315,16 @@ return new BooleanScorer(similarity, minNrShouldMatch, optional, prohibited); } + if (required.size() == 0 && optional.size() == 0) { + // no required and optional clauses. + return null; + } else if (optional.size() < minNrShouldMatch) { + // either >1 req scorer, or there are 0 req scorers and at least 1 + // optional scorer. Therefore if there are not enough optional scorers + // no documents will be matched by the query + return null; + } + // Return a BooleanScorer2 return new BooleanScorer2(similarity, minNrShouldMatch, required, prohibited, optional); } Index: src/java/org/apache/lucene/search/PhraseQuery.java =================================================================== --- src/java/org/apache/lucene/search/PhraseQuery.java (revision 797728) +++ src/java/org/apache/lucene/search/PhraseQuery.java (working copy) @@ -209,7 +209,11 @@ fieldExpl.setDescription("fieldWeight("+field+":"+query+" in "+doc+ "), product of:"); - Explanation tfExpl = scorer(reader, true, false).explain(doc); + Scorer scorer = scorer(reader, true, false); + if (scorer == null) { + return new Explanation(0.0f, "no matching docs"); + } + Explanation tfExpl = scorer.explain(doc); fieldExpl.addDetail(tfExpl); fieldExpl.addDetail(idfExpl); Index: src/java/org/apache/lucene/search/DocIdSet.java =================================================================== --- src/java/org/apache/lucene/search/DocIdSet.java (revision 797728) +++ src/java/org/apache/lucene/search/DocIdSet.java (working copy) @@ -30,6 +30,8 @@ * implemented using a {@link SortedVIntList}). */ public static final DocIdSet EMPTY_DOCIDSET = new SortedVIntList(new int[0]); - /** Provides a {@link DocIdSetIterator} to access the set. */ + /** Provides a {@link DocIdSetIterator} to access the set. + * This may (but is not required to) return null if there + * are no docs that match. */ public abstract DocIdSetIterator iterator() throws IOException; } Index: src/java/org/apache/lucene/search/NonMatchingScorer.java =================================================================== --- src/java/org/apache/lucene/search/NonMatchingScorer.java (revision 797728) +++ src/java/org/apache/lucene/search/NonMatchingScorer.java (working copy) @@ -1,50 +0,0 @@ -package org.apache.lucene.search; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -/** A scorer that matches no document at all. */ -class NonMatchingScorer extends Scorer { - public NonMatchingScorer() { super(null); } // no similarity used - - /** @deprecated use {@link #docID()} instead. */ - public int doc() { throw new UnsupportedOperationException(); } - - public int docID() { return NO_MORE_DOCS; } - - /** @deprecated use {@link #nextDoc()} instead. */ - public boolean next() throws IOException { return false; } - - public int nextDoc() throws IOException { return NO_MORE_DOCS; } - - public float score() { throw new UnsupportedOperationException(); } - - /** @deprecated use {@link #advance(int)} instead. */ - public boolean skipTo(int target) { return false; } - - public int advance(int target) { return NO_MORE_DOCS; } - - public Explanation explain(int doc) { - Explanation e = new Explanation(); - e.setDescription("No document matches."); - return e; - } -} - - Index: src/java/org/apache/lucene/search/IndexSearcher.java =================================================================== --- src/java/org/apache/lucene/search/IndexSearcher.java (revision 797728) +++ src/java/org/apache/lucene/search/IndexSearcher.java (working copy) @@ -267,8 +267,17 @@ assert docID == -1 || docID == DocIdSetIterator.NO_MORE_DOCS; // CHECKME: use ConjunctionScorer here? - DocIdSetIterator filterIter = filter.getDocIdSet(reader).iterator(); + DocIdSet filterDocIdSet = filter.getDocIdSet(reader); + if (filterDocIdSet == null) { + // this means the filter does not accept any documents. + return; + } + DocIdSetIterator filterIter = filterDocIdSet.iterator(); + if (filterIter == null) { + // this means the filter does not accept any documents. + return; + } int filterDoc = filterIter.nextDoc(); int scorerDoc = scorer.advance(filterDoc); Index: src/java/org/apache/lucene/index/DocumentsWriter.java =================================================================== --- src/java/org/apache/lucene/index/DocumentsWriter.java (revision 797728) +++ src/java/org/apache/lucene/index/DocumentsWriter.java (working copy) @@ -1007,12 +1007,14 @@ int limit = ((Integer) entry.getValue()).intValue(); QueryWeight weight = query.queryWeight(searcher); Scorer scorer = weight.scorer(reader, true, false); - while(true) { - int doc = scorer.nextDoc(); - if (((long) docIDStart) + doc >= limit) - break; - reader.deleteDocument(doc); - any = true; + if (scorer != null) { + while(true) { + int doc = scorer.nextDoc(); + if (((long) docIDStart) + doc >= limit) + break; + reader.deleteDocument(doc); + any = true; + } } } searcher.close(); Index: contrib/miscellaneous/src/java/org/apache/lucene/misc/ChainedFilter.java =================================================================== --- contrib/miscellaneous/src/java/org/apache/lucene/misc/ChainedFilter.java (revision 797728) +++ contrib/miscellaneous/src/java/org/apache/lucene/misc/ChainedFilter.java (working copy) @@ -60,6 +60,7 @@ import java.io.IOException; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.EmptyDocIdSetIterator; import org.apache.lucene.util.OpenBitSet; import org.apache.lucene.util.OpenBitSetDISI; import org.apache.lucene.util.SortedVIntList; @@ -146,9 +147,18 @@ } private DocIdSetIterator getDISI(Filter filter, IndexReader reader) - throws IOException - { - return filter.getDocIdSet(reader).iterator(); + throws IOException { + DocIdSet docIdSet = filter.getDocIdSet(reader); + if (docIdSet == null) { + return EmptyDocIdSetIterator.getInstance(); + } else { + DocIdSetIterator iter = docIdSet.iterator(); + if (iter == null) { + return EmptyDocIdSetIterator.getInstance(); + } else { + return iter; + } + } } private OpenBitSetDISI initialResult(IndexReader reader, int logic, int[] index) @@ -241,13 +251,11 @@ } private void doChain(OpenBitSetDISI result, int logic, DocIdSet dis) - throws IOException - { + throws IOException { if (dis instanceof OpenBitSet) { // optimized case for OpenBitSets - switch (logic) - { + switch (logic) { case OR: result.or((OpenBitSet) dis); break; @@ -265,9 +273,17 @@ break; } } else { - DocIdSetIterator disi = dis.iterator(); - switch (logic) - { + DocIdSetIterator disi; + if (dis == null) { + disi = EmptyDocIdSetIterator.getInstance(); + } else { + disi = dis.iterator(); + if (disi == null) { + disi = EmptyDocIdSetIterator.getInstance(); + } + } + + switch (logic) { case OR: result.inPlaceOr(disi); break;