Index: lucene/src/java/org/apache/lucene/util/FixedBitSet.java =================================================================== --- lucene/src/java/org/apache/lucene/util/FixedBitSet.java (revision 1174017) +++ lucene/src/java/org/apache/lucene/util/FixedBitSet.java (working copy) @@ -183,21 +183,93 @@ /** Does in-place OR of the bits provided by the * iterator. */ public void or(DocIdSetIterator iter) throws IOException { - int doc; - while ((doc = iter.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { - set(doc); + if (iter instanceof OpenBitSetIterator && iter.docID() == -1) { + final OpenBitSetIterator obs = (OpenBitSetIterator) iter; + or(obs.arr, obs.words); + } else { + int doc; + while ((doc = iter.nextDoc()) < numBits) { + set(doc); + } } } + /** this = this OR other */ public void or(FixedBitSet other) { - long[] thisArr = this.bits; - long[] otherArr = other.bits; - int pos = Math.min(thisArr.length, otherArr.length); + or(other.bits, other.bits.length); + } + + private void or(final long[] otherArr, final int otherLen) { + final long[] thisArr = this.bits; + int pos = Math.min(thisArr.length, otherLen); while (--pos >= 0) { thisArr[pos] |= otherArr[pos]; } } + /** Does in-place AND of the bits provided by the + * iterator. */ + public void and(DocIdSetIterator iter) throws IOException { + if (iter instanceof OpenBitSetIterator && iter.docID() == -1) { + final OpenBitSetIterator obs = (OpenBitSetIterator) iter; + and(obs.arr, obs.words); + } else { + if (numBits == 0) return; + int disiDoc, bitSetDoc = nextSetBit(0); + while (bitSetDoc != -1 && (disiDoc = iter.advance(bitSetDoc)) < numBits) { + clear(bitSetDoc, disiDoc); + disiDoc++; + bitSetDoc = (disiDoc < numBits) ? nextSetBit(disiDoc) : -1; + } + if (bitSetDoc != -1) { + clear(bitSetDoc, numBits); + } + } + } + + /** this = this AND other */ + public void and(FixedBitSet other) { + and(other.bits, other.bits.length); + } + + private void and(final long[] otherArr, final int otherLen) { + final long[] thisArr = this.bits; + int pos = Math.min(thisArr.length, otherLen); + while(--pos >= 0) { + thisArr[pos] &= otherArr[pos]; + } + if (thisArr.length > otherLen) { + Arrays.fill(thisArr, otherLen, thisArr.length, 0L); + } + } + + /** Does in-place AND NOT of the bits provided by the + * iterator. */ + public void andNot(DocIdSetIterator iter) throws IOException { + if (iter instanceof OpenBitSetIterator && iter.docID() == -1) { + final OpenBitSetIterator obs = (OpenBitSetIterator) iter; + andNot(obs.arr, obs.words); + } else { + int doc; + while ((doc = iter.nextDoc()) < numBits) { + clear(doc); + } + } + } + + /** this = this AND NOT other */ + public void andNot(FixedBitSet other) { + andNot(other.bits, other.bits.length); + } + + private void andNot(final long[] otherArr, final int otherLen) { + final long[] thisArr = this.bits; + int pos = Math.min(thisArr.length, otherLen); + while(--pos >= 0) { + thisArr[pos] &= ~otherArr[pos]; + } + } + // NOTE: no .isEmpty() here because that's trappy (ie, // typically isEmpty is low cost, but this one wouldn't // be) Index: lucene/src/java/org/apache/lucene/util/OpenBitSetIterator.java =================================================================== --- lucene/src/java/org/apache/lucene/util/OpenBitSetIterator.java (revision 1174017) +++ lucene/src/java/org/apache/lucene/util/OpenBitSetIterator.java (working copy) @@ -79,8 +79,8 @@ // for efficiency, or have a common root interface? (or // maybe both? could ask for a SetBitsIterator, etc... - private final long[] arr; - private final int words; + final long[] arr; + final int words; private int i=-1; private long word; private int wordShift; Index: modules/queries/src/java/org/apache/lucene/queries/BooleanFilter.java =================================================================== --- modules/queries/src/java/org/apache/lucene/queries/BooleanFilter.java (revision 1174017) +++ modules/queries/src/java/org/apache/lucene/queries/BooleanFilter.java (working copy) @@ -27,8 +27,7 @@ import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Filter; -import org.apache.lucene.util.OpenBitSet; -import org.apache.lucene.util.OpenBitSetDISI; +import org.apache.lucene.util.FixedBitSet; /** * A container Filter that allows Boolean composition of Filters. @@ -39,7 +38,6 @@ * The resulting Filter is NOT'd with the NOT Filters * The resulting Filter is AND'd with the MUST Filters */ - public class BooleanFilter extends Filter { List shouldFilters = null; @@ -52,53 +50,43 @@ */ @Override public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { - OpenBitSetDISI res = null; + FixedBitSet res = null; final IndexReader reader = context.reader; if (shouldFilters != null) { for (int i = 0; i < shouldFilters.size(); i++) { + final DocIdSetIterator disi = getDISI(shouldFilters, i, context); + if (disi == null) continue; if (res == null) { - res = new OpenBitSetDISI(getDISI(shouldFilters, i, context), reader.maxDoc()); - } else { - DocIdSet dis = shouldFilters.get(i).getDocIdSet(context); - if(dis instanceof OpenBitSet) { - // optimized case for OpenBitSets - res.or((OpenBitSet) dis); - } else { - res.inPlaceOr(getDISI(shouldFilters, i, context)); - } + res = new FixedBitSet(reader.maxDoc()); } + res.or(disi); } } if (notFilters != null) { for (int i = 0; i < notFilters.size(); i++) { if (res == null) { - res = new OpenBitSetDISI(getDISI(notFilters, i, context), reader.maxDoc()); - res.flip(0, reader.maxDoc()); // NOTE: may set bits on deleted docs - } else { - DocIdSet dis = notFilters.get(i).getDocIdSet(context); - if(dis instanceof OpenBitSet) { - // optimized case for OpenBitSets - res.andNot((OpenBitSet) dis); - } else { - res.inPlaceNot(getDISI(notFilters, i, context)); - } + res = new FixedBitSet(reader.maxDoc()); + res.set(0, reader.maxDoc()); // NOTE: may set bits on deleted docs } + final DocIdSetIterator disi = getDISI(notFilters, i, context); + if (disi != null) { + res.andNot(disi); + } } } if (mustFilters != null) { for (int i = 0; i < mustFilters.size(); i++) { + final DocIdSetIterator disi = getDISI(mustFilters, i, context); + if (disi == null) { + return DocIdSet.EMPTY_DOCIDSET; // no documents can match + } if (res == null) { - res = new OpenBitSetDISI(getDISI(mustFilters, i, context), reader.maxDoc()); + res = new FixedBitSet(reader.maxDoc()); + res.or(disi); } else { - DocIdSet dis = mustFilters.get(i).getDocIdSet(context); - if(dis instanceof OpenBitSet) { - // optimized case for OpenBitSets - res.and((OpenBitSet) dis); - } else { - res.inPlaceAnd(getDISI(mustFilters, i, context)); - } + res.and(disi); } } } @@ -131,7 +119,8 @@ private DocIdSetIterator getDISI(List filters, int index, AtomicReaderContext context) throws IOException { - return filters.get(index).getDocIdSet(context).iterator(); + final DocIdSet set = filters.get(index).getDocIdSet(context); + return (set == null) ? null : set.iterator(); } @Override Index: modules/queries/src/test/org/apache/lucene/queries/BooleanFilterTest.java =================================================================== --- modules/queries/src/test/org/apache/lucene/queries/BooleanFilterTest.java (revision 1174017) +++ modules/queries/src/test/org/apache/lucene/queries/BooleanFilterTest.java (working copy) @@ -30,6 +30,10 @@ import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Filter; import org.apache.lucene.search.TermRangeFilter; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.QueryWrapperFilter; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; @@ -82,9 +86,42 @@ return tf; } + + private Filter getWrappedTermQuery(String field, String text) { + return new QueryWrapperFilter(new TermQuery(new Term(field, text))); + } + + private Filter getNullDISFilter() { + return new Filter() { + @Override + public DocIdSet getDocIdSet(AtomicReaderContext context) { + return null; + } + }; + } + private Filter getNullDISIFilter() { + return new Filter() { + @Override + public DocIdSet getDocIdSet(AtomicReaderContext context) { + return new DocIdSet() { + @Override + public DocIdSetIterator iterator() { + return null; + } + + @Override + public boolean isCacheable() { + return true; + } + }; + } + }; + } + private void tstFilterCard(String mes, int expected, Filter filt) throws Throwable { + // BooleanFilter never returns null DIS or null DISI! DocIdSetIterator disi = filt.getDocIdSet(new AtomicReaderContext(reader)).iterator(); int actual = 0; while (disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { @@ -98,6 +135,11 @@ BooleanFilter booleanFilter = new BooleanFilter(); booleanFilter.add(new FilterClause(getTermsFilter("price", "030"), BooleanClause.Occur.SHOULD)); tstFilterCard("Should retrieves only 1 doc", 1, booleanFilter); + + // same with a real DISI (no OpenBitSetIterator) + booleanFilter = new BooleanFilter(); + booleanFilter.add(new FilterClause(getWrappedTermQuery("price", "030"), BooleanClause.Occur.SHOULD)); + tstFilterCard("Should retrieves only 1 doc", 1, booleanFilter); } public void testShoulds() throws Throwable { @@ -116,6 +158,16 @@ booleanFilter.add(new FilterClause(getTermsFilter("inStock", "Maybe"), BooleanClause.Occur.MUST_NOT)); tstFilterCard("Shoulds Ored but AndNots", 3, booleanFilter); + + // same with a real DISI (no OpenBitSetIterator) + booleanFilter = new BooleanFilter(); + booleanFilter.add(new FilterClause(getRangeFilter("price", "010", "020"), BooleanClause.Occur.SHOULD)); + booleanFilter.add(new FilterClause(getRangeFilter("price", "020", "030"), BooleanClause.Occur.SHOULD)); + booleanFilter.add(new FilterClause(getWrappedTermQuery("inStock", "N"), BooleanClause.Occur.MUST_NOT)); + tstFilterCard("Shoulds Ored but AndNot", 4, booleanFilter); + + booleanFilter.add(new FilterClause(getWrappedTermQuery("inStock", "Maybe"), BooleanClause.Occur.MUST_NOT)); + tstFilterCard("Shoulds Ored but AndNots", 3, booleanFilter); } public void testShouldsAndMust() throws Throwable { @@ -124,6 +176,13 @@ booleanFilter.add(new FilterClause(getRangeFilter("price", "020", "030"), BooleanClause.Occur.SHOULD)); booleanFilter.add(new FilterClause(getTermsFilter("accessRights", "admin"), BooleanClause.Occur.MUST)); tstFilterCard("Shoulds Ored but MUST", 3, booleanFilter); + + // same with a real DISI (no OpenBitSetIterator) + booleanFilter = new BooleanFilter(); + booleanFilter.add(new FilterClause(getRangeFilter("price", "010", "020"), BooleanClause.Occur.SHOULD)); + booleanFilter.add(new FilterClause(getRangeFilter("price", "020", "030"), BooleanClause.Occur.SHOULD)); + booleanFilter.add(new FilterClause(getWrappedTermQuery("accessRights", "admin"), BooleanClause.Occur.MUST)); + tstFilterCard("Shoulds Ored but MUST", 3, booleanFilter); } public void testShouldsAndMusts() throws Throwable { @@ -142,18 +201,36 @@ booleanFilter.add(new FilterClause(getRangeFilter("date", "20050101", "20051231"), BooleanClause.Occur.MUST)); booleanFilter.add(new FilterClause(getTermsFilter("inStock", "N"), BooleanClause.Occur.MUST_NOT)); tstFilterCard("Shoulds Ored but MUSTs ANDED and MustNot", 0, booleanFilter); + + // same with a real DISI (no OpenBitSetIterator) + booleanFilter = new BooleanFilter(); + booleanFilter.add(new FilterClause(getRangeFilter("price", "030", "040"), BooleanClause.Occur.SHOULD)); + booleanFilter.add(new FilterClause(getWrappedTermQuery("accessRights", "admin"), BooleanClause.Occur.MUST)); + booleanFilter.add(new FilterClause(getRangeFilter("date", "20050101", "20051231"), BooleanClause.Occur.MUST)); + booleanFilter.add(new FilterClause(getWrappedTermQuery("inStock", "N"), BooleanClause.Occur.MUST_NOT)); + tstFilterCard("Shoulds Ored but MUSTs ANDED and MustNot", 0, booleanFilter); } public void testJustMust() throws Throwable { BooleanFilter booleanFilter = new BooleanFilter(); booleanFilter.add(new FilterClause(getTermsFilter("accessRights", "admin"), BooleanClause.Occur.MUST)); tstFilterCard("MUST", 3, booleanFilter); + + // same with a real DISI (no OpenBitSetIterator) + booleanFilter = new BooleanFilter(); + booleanFilter.add(new FilterClause(getWrappedTermQuery("accessRights", "admin"), BooleanClause.Occur.MUST)); + tstFilterCard("MUST", 3, booleanFilter); } public void testJustMustNot() throws Throwable { BooleanFilter booleanFilter = new BooleanFilter(); booleanFilter.add(new FilterClause(getTermsFilter("inStock", "N"), BooleanClause.Occur.MUST_NOT)); tstFilterCard("MUST_NOT", 4, booleanFilter); + + // same with a real DISI (no OpenBitSetIterator) + booleanFilter = new BooleanFilter(); + booleanFilter.add(new FilterClause(getWrappedTermQuery("inStock", "N"), BooleanClause.Occur.MUST_NOT)); + tstFilterCard("MUST_NOT", 4, booleanFilter); } public void testMustAndMustNot() throws Throwable { @@ -161,5 +238,69 @@ booleanFilter.add(new FilterClause(getTermsFilter("inStock", "N"), BooleanClause.Occur.MUST)); booleanFilter.add(new FilterClause(getTermsFilter("price", "030"), BooleanClause.Occur.MUST_NOT)); tstFilterCard("MUST_NOT wins over MUST for same docs", 0, booleanFilter); + + // same with a real DISI (no OpenBitSetIterator) + booleanFilter = new BooleanFilter(); + booleanFilter.add(new FilterClause(getWrappedTermQuery("inStock", "N"), BooleanClause.Occur.MUST)); + booleanFilter.add(new FilterClause(getWrappedTermQuery("price", "030"), BooleanClause.Occur.MUST_NOT)); + tstFilterCard("MUST_NOT wins over MUST for same docs", 0, booleanFilter); } + + public void testCombinedNullDocIdSets() throws Throwable { + BooleanFilter booleanFilter = new BooleanFilter(); + booleanFilter.add(new FilterClause(getTermsFilter("price", "030"), BooleanClause.Occur.MUST)); + booleanFilter.add(new FilterClause(getNullDISFilter(), BooleanClause.Occur.MUST)); + tstFilterCard("A MUST filter that returns a null DIS should never return documents", 0, booleanFilter); + + booleanFilter = new BooleanFilter(); + booleanFilter.add(new FilterClause(getTermsFilter("price", "030"), BooleanClause.Occur.MUST)); + booleanFilter.add(new FilterClause(getNullDISIFilter(), BooleanClause.Occur.MUST)); + tstFilterCard("A MUST filter that returns a null DISI should never return documents", 0, booleanFilter); + + booleanFilter = new BooleanFilter(); + booleanFilter.add(new FilterClause(getTermsFilter("price", "030"), BooleanClause.Occur.SHOULD)); + booleanFilter.add(new FilterClause(getNullDISFilter(), BooleanClause.Occur.SHOULD)); + tstFilterCard("A SHOULD filter that returns a null DIS should be invisible", 1, booleanFilter); + + booleanFilter = new BooleanFilter(); + booleanFilter.add(new FilterClause(getTermsFilter("price", "030"), BooleanClause.Occur.SHOULD)); + booleanFilter.add(new FilterClause(getNullDISIFilter(), BooleanClause.Occur.SHOULD)); + tstFilterCard("A SHOULD filter that returns a null DISI should be invisible", 1, booleanFilter); + + booleanFilter = new BooleanFilter(); + booleanFilter.add(new FilterClause(getTermsFilter("price", "030"), BooleanClause.Occur.MUST)); + booleanFilter.add(new FilterClause(getNullDISFilter(), BooleanClause.Occur.MUST_NOT)); + tstFilterCard("A MUST_NOT filter that returns a null DIS should be invisible", 1, booleanFilter); + + booleanFilter = new BooleanFilter(); + booleanFilter.add(new FilterClause(getTermsFilter("price", "030"), BooleanClause.Occur.MUST)); + booleanFilter.add(new FilterClause(getNullDISIFilter(), BooleanClause.Occur.MUST_NOT)); + tstFilterCard("A MUST_NOT filter that returns a null DISI should be invisible", 1, booleanFilter); + } + + public void testJustNullDocIdSets() throws Throwable { + BooleanFilter booleanFilter = new BooleanFilter(); + booleanFilter.add(new FilterClause(getNullDISFilter(), BooleanClause.Occur.MUST)); + tstFilterCard("A MUST filter that returns a null DIS should never return documents", 0, booleanFilter); + + booleanFilter = new BooleanFilter(); + booleanFilter.add(new FilterClause(getNullDISIFilter(), BooleanClause.Occur.MUST)); + tstFilterCard("A MUST filter that returns a null DISI should never return documents", 0, booleanFilter); + + booleanFilter = new BooleanFilter(); + booleanFilter.add(new FilterClause(getNullDISFilter(), BooleanClause.Occur.SHOULD)); + tstFilterCard("A single SHOULD filter that returns a null DIS should never return documents", 0, booleanFilter); + + booleanFilter = new BooleanFilter(); + booleanFilter.add(new FilterClause(getNullDISIFilter(), BooleanClause.Occur.SHOULD)); + tstFilterCard("A single SHOULD filter that returns a null DISI should never return documents", 0, booleanFilter); + + booleanFilter = new BooleanFilter(); + booleanFilter.add(new FilterClause(getNullDISFilter(), BooleanClause.Occur.MUST_NOT)); + tstFilterCard("A single MUST_NOT filter that returns a null DIS should be invisible", 5, booleanFilter); + + booleanFilter = new BooleanFilter(); + booleanFilter.add(new FilterClause(getNullDISIFilter(), BooleanClause.Occur.MUST_NOT)); + tstFilterCard("A single MUST_NOT filter that returns a null DIS should be invisible", 5, booleanFilter); + } }