Index: lucene/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java (revision 1179035) +++ lucene/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java (working copy) @@ -192,6 +192,8 @@ docs = searcher.search(new MatchAllDocsQuery(), filter, 1); assertEquals("[query + filter] Should find a hit...", 1, docs.totalHits); + + // ignore deletes ConstantScoreQuery constantScore = new ConstantScoreQuery(filter); docs = searcher.search(constantScore, 1); assertEquals("[just filter] Should find a hit...", 1, docs.totalHits); @@ -204,13 +206,15 @@ searcher.close(); searcher = newSearcher(reader, false); + // no hit because IS ANDs del docs docs = searcher.search(new MatchAllDocsQuery(), filter, 1); assertEquals("[query + filter] Should *not* find a hit...", 0, docs.totalHits); + // no hit because CSQ realizes wrapped filter includes + // non-live docs and ANDs the live docs on the fly: docs = searcher.search(constantScore, 1); - assertEquals("[just filter] Should find a hit...", 1, docs.totalHits); + assertEquals("[just filter] Should not find a hit...", 0, docs.totalHits); - // force cache to regenerate: filter = new CachingWrapperFilter(startFilter, CachingWrapperFilter.DeletesMode.RECACHE); Index: lucene/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java =================================================================== --- lucene/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java (revision 1179035) +++ lucene/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java (working copy) @@ -160,7 +160,7 @@ return new FieldCacheRangeFilter.FieldCacheDocIdSet(context.reader, true) { @Override - boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException { + public boolean get(int doc) throws ArrayIndexOutOfBoundsException { return termSet.get(fcsi.getOrd(doc)); } }; Index: lucene/src/test/org/apache/lucene/search/TestConstantScoreQuery.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestConstantScoreQuery.java (revision 1179035) +++ lucene/src/test/org/apache/lucene/search/TestConstantScoreQuery.java (working copy) @@ -130,5 +130,32 @@ if (directory != null) directory.close(); } } + + public void testConstantScoreQueryAndFilter() throws Exception { + Directory d = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random, d); + Document doc = new Document(); + doc.add(newField("field", "a", StringField.TYPE_UNSTORED)); + w.addDocument(doc); + doc = new Document(); + doc.add(newField("field", "b", StringField.TYPE_UNSTORED)); + w.addDocument(doc); + IndexReader r = w.getReader(); + w.close(); + + Filter filterB = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("field", "b"))), CachingWrapperFilter.DeletesMode.RECACHE); + Query query = new ConstantScoreQuery(filterB); + + IndexSearcher s = new IndexSearcher(r); + assertEquals(1, s.search(query, filterB, 1).totalHits); // Query for field:b, Filter field:b + + Filter filterA = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("field", "a"))), CachingWrapperFilter.DeletesMode.RECACHE); + query = new ConstantScoreQuery(filterA); + + assertEquals(0, s.search(query, filterB, 1).totalHits); // Query field:b, Filter field:a + + r.close(); + d.close(); + } } Index: lucene/src/test/org/apache/lucene/search/TestFilteredSearch.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestFilteredSearch.java (revision 1179035) +++ lucene/src/test/org/apache/lucene/search/TestFilteredSearch.java (working copy) @@ -23,9 +23,12 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.Directory; @@ -116,4 +119,56 @@ } } + /** when a filter is executed random access, make sure + * we get BucketScorer + */ + public void testBS1WithRandomAccessFilter() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(random, dir); + Document d = new Document(); + d.add(newField("foo", "bar", TextField.TYPE_STORED)); + iw.addDocument(d); + d = new Document(); + d.add(newField("foo", "baz", TextField.TYPE_STORED)); + iw.addDocument(d); + + IndexReader ir = iw.getReader(); + iw.close(); + + IndexSearcher is = newSearcher(ir); + + // force the filter to be executed as bits + is.setFilterRandomAccessThreshold(Integer.MAX_VALUE); + + // for the combined BQ, the scorer should always be BooleanScorer's BucketScorer, because our scorer supports out-of order collection! + final String bucketScorerClass = BooleanScorer.class.getName() + "$BucketScorer"; + BooleanQuery bq = new BooleanQuery(); + bq.add(new TermQuery(new Term("foo", "bar")), BooleanClause.Occur.SHOULD); + bq.add(new TermQuery(new Term("foo", "baz")), BooleanClause.Occur.SHOULD); + is.search(bq, new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("foo", "bar")))), + new Collector() { + + @Override + public void setScorer(Scorer scorer) throws IOException { + assertEquals(bucketScorerClass, scorer.getClass().getName()); + } + + @Override + public void collect(int doc) throws IOException { + } + + @Override + public void setNextReader(AtomicReaderContext context) throws IOException { + } + + @Override + public boolean acceptsDocsOutOfOrder() { + return true; + } + } + ); + is.close(); + ir.close(); + dir.close(); + } } Index: lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java (revision 1179035) +++ lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java (working copy) @@ -87,13 +87,15 @@ searcher.close(); searcher = newSearcher(reader, false); + // no hit because IS ANDs del docs: docs = searcher.search(new MatchAllDocsQuery(), filter, 1); assertEquals("[query + filter] Should *not* find a hit...", 0, docs.totalHits); + // no hit because CSQ realizes wrapped filter includes + // non-live docs and ANDs the live docs on the fly: docs = searcher.search(constantScore, 1); - assertEquals("[just filter] Should find a hit...", 1, docs.totalHits); + assertEquals("[just filter] Should not find a hit...", 0, docs.totalHits); - // force cache to regenerate: filter = new CachingSpanFilter(startFilter, CachingWrapperFilter.DeletesMode.RECACHE); Index: lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java (revision 1179035) +++ lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java (working copy) @@ -128,15 +128,26 @@ @Override public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, - boolean topScorer, Bits acceptDocs) throws IOException { + boolean topScorer, final Bits acceptDocs) throws IOException { final DocIdSetIterator disi; if (filter != null) { assert query == null; final DocIdSet dis = filter.getDocIdSet(context); if (dis == null) { return null; + } else if (acceptDocs != null) { + // Must dynamically AND in the incoming + // acceptOnlyDocs since we have no way to ask + // the filter to do so + disi = new FilteredDocIdSetIterator(dis.iterator()) { + @Override + protected boolean match(int doc) throws IOException { + return acceptDocs.get(doc); + } + }; + } else { + disi = dis.iterator(); } - disi = dis.iterator(); } else { assert query != null && innerWeight != null; disi = innerWeight.scorer(context, scoreDocsInOrder, topScorer, acceptDocs); Index: lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java (revision 1179035) +++ lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java (working copy) @@ -83,6 +83,7 @@ missCount++; result = filter.bitSpans(context); + result.getDocIdSet().setContainsOnlyLiveDocs(cache.deletesMode == CachingWrapperFilter.DeletesMode.RECACHE); cache.put(coreKey, delCoreKey, result); return result; Index: lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java (revision 1179035) +++ lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java (working copy) @@ -126,7 +126,7 @@ // because deleted docs have an order of 0 (null entry in StringIndex) return new FieldCacheDocIdSet(context.reader, true) { @Override - final boolean matchDoc(int doc) { + public final boolean get(int doc) { final int docOrd = fcsi.getOrd(doc); return docOrd >= inclusiveLowerPoint && docOrd <= inclusiveUpperPoint; } @@ -178,7 +178,7 @@ // we only respect deleted docs if the range contains 0 return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)) { @Override - boolean matchDoc(int doc) { + public boolean get(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; } }; @@ -229,7 +229,7 @@ // ignore deleted docs if range doesn't contain 0 return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)) { @Override - boolean matchDoc(int doc) { + public boolean get(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; } }; @@ -280,7 +280,7 @@ // ignore deleted docs if range doesn't contain 0 return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)) { @Override - boolean matchDoc(int doc) { + public boolean get(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; } }; @@ -331,7 +331,7 @@ // ignore deleted docs if range doesn't contain 0 return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0L && inclusiveUpperPoint >= 0L)) { @Override - boolean matchDoc(int doc) { + public boolean get(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; } }; @@ -386,7 +386,7 @@ // ignore deleted docs if range doesn't contain 0 return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0.0f && inclusiveUpperPoint >= 0.0f)) { @Override - boolean matchDoc(int doc) { + public boolean get(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; } }; @@ -441,7 +441,7 @@ // ignore deleted docs if range doesn't contain 0 return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0.0 && inclusiveUpperPoint >= 0.0)) { @Override - boolean matchDoc(int doc) { + public boolean get(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; } }; @@ -518,7 +518,7 @@ * this method checks, if a doc is a hit, should throw AIOBE, when position * invalid */ - abstract boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException; + abstract boolean get(int doc) throws ArrayIndexOutOfBoundsException; /** * this DocIdSet is always cacheable (does not go back @@ -548,7 +548,7 @@ try { do { doc++; - } while (!matchDoc(doc)); + } while (!get(doc)); return doc; } catch (ArrayIndexOutOfBoundsException e) { return doc = NO_MORE_DOCS; @@ -559,7 +559,7 @@ public int advance(int target) { try { doc = target; - while (!matchDoc(doc)) { + while (!get(doc)) { doc++; } return doc; @@ -589,14 +589,14 @@ if (doc >= maxDoc) { return doc = NO_MORE_DOCS; } - } while (!liveDocs.get(doc) || !matchDoc(doc)); + } while (!liveDocs.get(doc) || !get(doc)); return doc; } @Override public int advance(int target) { for(doc=target;doc cache; - private final DeletesMode deletesMode; + final DeletesMode deletesMode; public FilterCache(DeletesMode deletesMode) { this.deletesMode = deletesMode; @@ -217,6 +217,7 @@ docIdSet = docIdSetToCache(filter.getDocIdSet(context), reader); if (docIdSet != null) { + docIdSet.setContainsOnlyLiveDocs(cache.deletesMode != DeletesMode.IGNORE); cache.put(coreKey, delCoreKey, docIdSet); } Index: lucene/src/java/org/apache/lucene/search/DocIdSet.java =================================================================== --- lucene/src/java/org/apache/lucene/search/DocIdSet.java (revision 1179035) +++ lucene/src/java/org/apache/lucene/search/DocIdSet.java (working copy) @@ -25,6 +25,10 @@ */ public abstract class DocIdSet { + // If true, this Filter includes only live documents (ie + // deleted documents have already been filtered out): + private boolean containsOnlyLiveDocs; + /** An empty {@code DocIdSet} instance for easy use, e.g. in Filters that hit no documents. */ public static final DocIdSet EMPTY_DOCIDSET = new DocIdSet() { @@ -64,4 +68,27 @@ public boolean isCacheable() { return false; } + + /** + * Returns whether this DocIdSet contains only live documents (deleted documents + * have already been filtered out) + * + * @return {@code true} if the DocIdSet contains only live documents, + * {@code false} otherwise + * @lucene.experimental + */ + public boolean containsOnlyLiveDocs() { + return containsOnlyLiveDocs; + } + + /** + * Allows this DocIdSet to be marked as containing only live documents + * (documents that have not been deleted). + * + * @param containsOnlyLiveDocs Whether this DocIdSet contains only live documents + * @lucene.experimental + */ + public void setContainsOnlyLiveDocs(boolean containsOnlyLiveDocs) { + this.containsOnlyLiveDocs = containsOnlyLiveDocs; + } } Index: lucene/src/java/org/apache/lucene/search/IndexSearcher.java =================================================================== --- lucene/src/java/org/apache/lucene/search/IndexSearcher.java (revision 1179035) +++ lucene/src/java/org/apache/lucene/search/IndexSearcher.java (working copy) @@ -41,6 +41,8 @@ import org.apache.lucene.search.similarities.SimilarityProvider; import org.apache.lucene.store.Directory; import org.apache.lucene.store.NIOFSDirectory; // javadoc +import org.apache.lucene.util.AndBits; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.ThreadInterruptedException; @@ -589,16 +591,7 @@ final Filter filter, final Collector collector) throws IOException { assert filter != null; - - // we are gonna advance() this scorer, so we set inorder=true/toplevel=false - Scorer scorer = weight.scorer(context, true, false, context.reader.getLiveDocs()); - if (scorer == null) { - return; - } - int docID = scorer.docID(); - assert docID == -1 || docID == DocIdSetIterator.NO_MORE_DOCS; - // CHECKME: use ConjunctionScorer here? DocIdSet filterDocIdSet = filter.getDocIdSet(context); if (filterDocIdSet == null) { @@ -611,27 +604,92 @@ // this means the filter does not accept any documents. return; } - int filterDoc = filterIter.nextDoc(); - int scorerDoc = scorer.advance(filterDoc); - - collector.setScorer(scorer); - while (true) { - if (scorerDoc == filterDoc) { - // Check if scorer has exhausted, only before collecting. - if (scorerDoc == DocIdSetIterator.NO_MORE_DOCS) { - break; - } - collector.collect(scorerDoc); - filterDoc = filterIter.nextDoc(); - scorerDoc = scorer.advance(filterDoc); - } else if (scorerDoc > filterDoc) { - filterDoc = filterIter.advance(scorerDoc); + + boolean filterContainsLiveDocs = filterDocIdSet.containsOnlyLiveDocs(); + // if our filter already filters-out deleted docs, then we don't need to + // worry about the reader's liveDocs set. + Bits acceptDocs = filterContainsLiveDocs ? null : context.reader.getLiveDocs(); + + int firstFilterDoc = filterIter.nextDoc(); + + boolean useRandomAccess = false; + + if (filterDocIdSet instanceof Bits && firstFilterDoc < filterRandomAccessThreshold) { + useRandomAccess = true; + Bits randomAccessBits = (Bits) filterDocIdSet; + // Filter by random-access: we push the filter all the way down to the atomic + // scorers, so the bits are applied just like deleted docs + if (acceptDocs != null) { + // TODO: we could swap order of these two if we knew + // which was more restrictive? + acceptDocs = new AndBits(randomAccessBits, acceptDocs); } else { - scorerDoc = scorer.advance(filterDoc); + acceptDocs = randomAccessBits; } } + + // if we are using random access, we collect normally, otherwise, + // we are gonna advance() this scorer, so we set inorder=true/toplevel=false + Scorer scorer = useRandomAccess + ? weight.scorer(context, !collector.acceptsDocsOutOfOrder(), true, acceptDocs) + : weight.scorer(context, true, false, acceptDocs); + if (scorer == null) { + return; + } + + if (useRandomAccess) { + collector.setNextReader(context); + scorer.score(collector); + } else { + int docID = scorer.docID(); + assert docID == -1 || docID == DocIdSetIterator.NO_MORE_DOCS; + int filterDoc = firstFilterDoc; + int scorerDoc = scorer.advance(filterDoc); + collector.setScorer(scorer); + while (true) { + if (scorerDoc == filterDoc) { + // Check if scorer has exhausted, only before collecting. + if (scorerDoc == DocIdSetIterator.NO_MORE_DOCS) { + break; + } + collector.collect(scorerDoc); + filterDoc = filterIter.nextDoc(); + scorerDoc = scorer.advance(filterDoc); + } else if (scorerDoc > filterDoc) { + filterDoc = filterIter.advance(scorerDoc); + } else { + scorerDoc = scorer.advance(filterDoc); + } + } + } } + private int filterRandomAccessThreshold = 100; + + /** + * Expert: + * + * @return Threshold used to heuristics to determine if a Filter is dense or sparse. + * @see #setFilterRandomAccessThreshold(int) + * @lucene.experimental + */ + public int getFilterRandomAccessThreshold() { + return filterRandomAccessThreshold; + } + + /** + * Expert: Sets the threshold used in the heuristics to determine if a + * Filter is dense or sparse (and therefore whether random-access should + * be used or not). If a document is found in the Filter beneath this + * threshold, it is assumed to be dense. + * + * @param value Threshold to be used in this IndexSearcher + * @lucene.experimental + */ + public void setFilterRandomAccessThreshold(int value) { + filterRandomAccessThreshold = value; + } + /** Expert: called to re-write queries into primitive queries. * @throws BooleanQuery.TooManyClauses */ Index: lucene/src/java/org/apache/lucene/util/AndBits.java =================================================================== --- lucene/src/java/org/apache/lucene/util/AndBits.java (revision 0) +++ lucene/src/java/org/apache/lucene/util/AndBits.java (revision 0) @@ -0,0 +1,42 @@ +package org.apache.lucene.util; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @lucene.experimental + */ +public class AndBits implements Bits { + + private final Bits bits1; + private final Bits bits2; + + public AndBits(Bits bits1, Bits bits2) { + this.bits1 = bits1; + this.bits2 = bits2; + } + + @Override + public boolean get(int index) { + return bits1.get(index) && bits2.get(index); + } + + @Override + public int length() { + return Math.min(bits1.length(), bits2.length()); + } +} Index: lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java =================================================================== --- lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java (revision 1179035) +++ lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java (working copy) @@ -1289,6 +1289,7 @@ } IndexSearcher ret = random.nextBoolean() ? new AssertingIndexSearcher(r) : new AssertingIndexSearcher(r.getTopReaderContext()); ret.setSimilarityProvider(similarityProvider); + ret.setFilterRandomAccessThreshold(_TestUtil.nextInt(random, 1, 200)); return ret; } else { int threads = 0; @@ -1313,6 +1314,7 @@ } }; ret.setSimilarityProvider(similarityProvider); + ret.setFilterRandomAccessThreshold(_TestUtil.nextInt(random, 1, 200)); return ret; } }