Index: modules/join/src/java/org/apache/lucene/search/join/BlockJoinQuery.java =================================================================== --- modules/join/src/java/org/apache/lucene/search/join/BlockJoinQuery.java (revision 1178176) +++ modules/join/src/java/org/apache/lucene/search/join/BlockJoinQuery.java (working copy) @@ -171,7 +171,7 @@ return null; } if (!(parents instanceof FixedBitSet)) { - throw new IllegalStateException("parentFilter must return FixedBitSet; got " + parents); + throw new IllegalStateException("parentFilter must return FixedBitSet; got " + parents + "filter= " + parentsFilter); } return new BlockJoinScorer(this, childScorer, (FixedBitSet) parents, firstChildDoc, scoreMode); Index: lucene/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java (revision 1178176) +++ lucene/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java (working copy) @@ -192,6 +192,8 @@ docs = searcher.search(new MatchAllDocsQuery(), filter, 1); assertEquals("[query + filter] Should find a hit...", 1, docs.totalHits); + + // ignore deletes ConstantScoreQuery constantScore = new ConstantScoreQuery(filter); docs = searcher.search(constantScore, 1); assertEquals("[just filter] Should find a hit...", 1, docs.totalHits); @@ -204,13 +206,15 @@ searcher.close(); searcher = newSearcher(reader, false); + // no hit because IS ANDs del docs docs = searcher.search(new MatchAllDocsQuery(), filter, 1); assertEquals("[query + filter] Should *not* find a hit...", 0, docs.totalHits); + // no hit because CSQ realizes wrapped filter includes + // non-live docs and ANDs the live docs on the fly: docs = searcher.search(constantScore, 1); - assertEquals("[just filter] Should find a hit...", 1, docs.totalHits); + assertEquals("[just filter] Should not find a hit...", 0, docs.totalHits); - // force cache to regenerate: filter = new CachingWrapperFilter(startFilter, CachingWrapperFilter.DeletesMode.RECACHE); Index: lucene/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java =================================================================== --- lucene/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java (revision 1178176) +++ lucene/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java (working copy) @@ -160,7 +160,7 @@ return new FieldCacheRangeFilter.FieldCacheDocIdSet(context.reader, true) { @Override - boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException { + public boolean get(int doc) throws ArrayIndexOutOfBoundsException { return termSet.get(fcsi.getOrd(doc)); } }; Index: lucene/src/test/org/apache/lucene/search/TestConstantScoreQuery.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestConstantScoreQuery.java (revision 1178176) +++ lucene/src/test/org/apache/lucene/search/TestConstantScoreQuery.java (working copy) @@ -130,5 +130,32 @@ if (directory != null) directory.close(); } } + + public void testConstantScoreQueryAndFilter() throws Exception { + Directory d = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random, d); + Document doc = new Document(); + doc.add(newField("field", "a", StringField.TYPE_UNSTORED)); + w.addDocument(doc); + doc = new Document(); + doc.add(newField("field", "b", StringField.TYPE_UNSTORED)); + w.addDocument(doc); + IndexReader r = w.getReader(); + w.close(); + + Filter filterB = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("field", "b"))), CachingWrapperFilter.DeletesMode.RECACHE); + Query query = new ConstantScoreQuery(filterB); + + IndexSearcher s = new IndexSearcher(r); + assertEquals(1, s.search(query, filterB, 1).totalHits); // Query for field:b, Filter field:b + + Filter filterA = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("field", "a"))), CachingWrapperFilter.DeletesMode.RECACHE); + query = new ConstantScoreQuery(filterA); + + assertEquals(0, s.search(query, filterB, 1).totalHits); // Query field:b, Filter field:a + + r.close(); + d.close(); + } } Index: lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java (revision 1178176) +++ lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java (working copy) @@ -87,13 +87,15 @@ searcher.close(); searcher = newSearcher(reader, false); + // no hit because IS ANDs del docs: docs = searcher.search(new MatchAllDocsQuery(), filter, 1); assertEquals("[query + filter] Should *not* find a hit...", 0, docs.totalHits); + // no hit because CSQ realizes wrapped filter includes + // non-live docs and ANDs the live docs on the fly: docs = searcher.search(constantScore, 1); - assertEquals("[just filter] Should find a hit...", 1, docs.totalHits); + assertEquals("[just filter] Should not find a hit...", 0, docs.totalHits); - // force cache to regenerate: filter = new CachingSpanFilter(startFilter, CachingWrapperFilter.DeletesMode.RECACHE); Index: lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java (revision 1178176) +++ lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java (working copy) @@ -128,15 +128,26 @@ @Override public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, - boolean topScorer, Bits acceptDocs) throws IOException { + boolean topScorer, final Bits acceptDocs) throws IOException { final DocIdSetIterator disi; if (filter != null) { assert query == null; final DocIdSet dis = filter.getDocIdSet(context); if (dis == null) { return null; + } else if (acceptDocs != null) { + // Must dynamically AND in the incoming + // acceptOnlyDocs since we have no way to ask + // the filter to do so + disi = new FilteredDocIdSetIterator(dis.iterator()) { + @Override + protected boolean match(int doc) throws IOException { + return acceptDocs.get(doc); + } + }; + } else { + disi = dis.iterator(); } - disi = dis.iterator(); } else { assert query != null && innerWeight != null; disi = innerWeight.scorer(context, scoreDocsInOrder, topScorer, acceptDocs); Index: lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java (revision 1178176) +++ lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java (working copy) @@ -83,6 +83,7 @@ missCount++; result = filter.bitSpans(context); + result.getDocIdSet().setContainsOnlyLiveDocs(cache.deletesMode == CachingWrapperFilter.DeletesMode.RECACHE); cache.put(coreKey, delCoreKey, result); return result; Index: lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java (revision 1178176) +++ lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java (working copy) @@ -126,7 +126,7 @@ // because deleted docs have an order of 0 (null entry in StringIndex) return new FieldCacheDocIdSet(context.reader, true) { @Override - final boolean matchDoc(int doc) { + public final boolean get(int doc) { final int docOrd = fcsi.getOrd(doc); return docOrd >= inclusiveLowerPoint && docOrd <= inclusiveUpperPoint; } @@ -178,7 +178,7 @@ // we only respect deleted docs if the range contains 0 return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)) { @Override - boolean matchDoc(int doc) { + public boolean get(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; } }; @@ -229,7 +229,7 @@ // ignore deleted docs if range doesn't contain 0 return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)) { @Override - boolean matchDoc(int doc) { + public boolean get(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; } }; @@ -280,7 +280,7 @@ // ignore deleted docs if range doesn't contain 0 return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)) { @Override - boolean matchDoc(int doc) { + public boolean get(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; } }; @@ -331,7 +331,7 @@ // ignore deleted docs if range doesn't contain 0 return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0L && inclusiveUpperPoint >= 0L)) { @Override - boolean matchDoc(int doc) { + public boolean get(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; } }; @@ -386,7 +386,7 @@ // ignore deleted docs if range doesn't contain 0 return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0.0f && inclusiveUpperPoint >= 0.0f)) { @Override - boolean matchDoc(int doc) { + public boolean get(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; } }; @@ -441,7 +441,7 @@ // ignore deleted docs if range doesn't contain 0 return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0.0 && inclusiveUpperPoint >= 0.0)) { @Override - boolean matchDoc(int doc) { + public boolean get(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; } }; @@ -518,7 +518,7 @@ * this method checks, if a doc is a hit, should throw AIOBE, when position * invalid */ - abstract boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException; + abstract boolean get(int doc) throws ArrayIndexOutOfBoundsException; /** * this DocIdSet is always cacheable (does not go back @@ -548,7 +548,7 @@ try { do { doc++; - } while (!matchDoc(doc)); + } while (!get(doc)); return doc; } catch (ArrayIndexOutOfBoundsException e) { return doc = NO_MORE_DOCS; @@ -559,7 +559,7 @@ public int advance(int target) { try { doc = target; - while (!matchDoc(doc)) { + while (!get(doc)) { doc++; } return doc; @@ -589,14 +589,14 @@ if (doc >= maxDoc) { return doc = NO_MORE_DOCS; } - } while (!liveDocs.get(doc) || !matchDoc(doc)); + } while (!liveDocs.get(doc) || !get(doc)); return doc; } @Override public int advance(int target) { for(doc=target;doc 1) { - postingsEnum = new UnionDocsAndPositionsEnum(reader, terms); + postingsEnum = new UnionDocsAndPositionsEnum(liveDocs, reader, terms); // coarse -- this overcounts since a given doc can // have more than one terms: @@ -435,9 +435,8 @@ private DocsQueue _queue; private IntQueue _posList; - public UnionDocsAndPositionsEnum(IndexReader indexReader, Term[] terms) throws IOException { + public UnionDocsAndPositionsEnum(Bits liveDocs, IndexReader indexReader, Term[] terms) throws IOException { List docsEnums = new LinkedList(); - final Bits liveDocs = indexReader.getLiveDocs(); for (int i = 0; i < terms.length; i++) { DocsAndPositionsEnum postings = indexReader.termPositionsEnum(liveDocs, terms[i].field(), Index: lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java (revision 1178176) +++ lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java (working copy) @@ -76,7 +76,7 @@ // after de-serialize transient Map cache; - private final DeletesMode deletesMode; + final DeletesMode deletesMode; public FilterCache(DeletesMode deletesMode) { this.deletesMode = deletesMode; @@ -217,6 +217,7 @@ docIdSet = docIdSetToCache(filter.getDocIdSet(context), reader); if (docIdSet != null) { + docIdSet.setContainsOnlyLiveDocs(cache.deletesMode != DeletesMode.IGNORE); cache.put(coreKey, delCoreKey, docIdSet); } Index: lucene/src/java/org/apache/lucene/search/TermQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/TermQuery.java (revision 1178176) +++ lucene/src/java/org/apache/lucene/search/TermQuery.java (working copy) @@ -76,13 +76,15 @@ @Override public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException { + final String field = term.field(); + final IndexReader reader = context.reader; assert termStates.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termStates.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context); - final TermsEnum termsEnum = getTermsEnum(context); - if (termsEnum == null) { + final TermState state = termStates.get(context.ord); + if (state == null) { + assert termNotInReader(reader, field, term.bytes()) : "no termstate found but term exists in reader"; return null; } - // TODO should we reuse the DocsEnum here? - final DocsEnum docs = termsEnum.docs(acceptDocs, null); + final DocsEnum docs = reader.termDocsEnum(acceptDocs, field, term.bytes(), state); assert docs != null; return new TermScorer(this, docs, createDocScorer(context)); } @@ -112,7 +114,6 @@ private boolean termNotInReader(IndexReader reader, String field, BytesRef bytes) throws IOException { // only called from assert - //System.out.println("TQ.termNotInReader reader=" + reader + " term=" + field + ":" + bytes.utf8ToString()); final Terms terms = reader.terms(field); return terms == null || terms.docFreq(bytes) == 0; } Index: lucene/src/java/org/apache/lucene/search/DocIdSet.java =================================================================== --- lucene/src/java/org/apache/lucene/search/DocIdSet.java (revision 1178176) +++ lucene/src/java/org/apache/lucene/search/DocIdSet.java (working copy) @@ -25,6 +25,10 @@ */ public abstract class DocIdSet { + // If true, this Filter includes only live documents (ie + // deleted documents have already been filtered out): + protected boolean containsOnlyLiveDocs; + /** An empty {@code DocIdSet} instance for easy use, e.g. in Filters that hit no documents. */ public static final DocIdSet EMPTY_DOCIDSET = new DocIdSet() { @@ -64,4 +68,25 @@ public boolean isCacheable() { return false; } + + /** + * Returns whether this DocIdSet contains only live documents (deleted documents + * have already been filtered out) + * + * @return {@code true} if the DocIdSet contains only live documents, + * {@code false} otherwise + */ + public boolean isContainsOnlyLiveDocs() { + return containsOnlyLiveDocs; + } + + /** + * Allows this DocIdSet to be marked as containing only live documents + * (documents that have not been deleted). + * + * @param containsOnlyLiveDocs Whether this DocIdSet contains only live documents + */ + public void setContainsOnlyLiveDocs(boolean containsOnlyLiveDocs) { + this.containsOnlyLiveDocs = containsOnlyLiveDocs; + } } Index: lucene/src/java/org/apache/lucene/search/IndexSearcher.java =================================================================== --- lucene/src/java/org/apache/lucene/search/IndexSearcher.java (revision 1178176) +++ lucene/src/java/org/apache/lucene/search/IndexSearcher.java (working copy) @@ -41,6 +41,8 @@ import org.apache.lucene.search.similarities.SimilarityProvider; import org.apache.lucene.store.Directory; import org.apache.lucene.store.NIOFSDirectory; // javadoc +import org.apache.lucene.util.AndBits; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.ThreadInterruptedException; @@ -589,49 +591,81 @@ final Filter filter, final Collector collector) throws IOException { assert filter != null; - - // we are gonna advance() this scorer, so we set inorder=true/toplevel=false - Scorer scorer = weight.scorer(context, true, false, context.reader.getLiveDocs()); - if (scorer == null) { - return; - } - int docID = scorer.docID(); - assert docID == -1 || docID == DocIdSetIterator.NO_MORE_DOCS; - // CHECKME: use ConjunctionScorer here? DocIdSet filterDocIdSet = filter.getDocIdSet(context); if (filterDocIdSet == null) { // this means the filter does not accept any documents. return; } - - DocIdSetIterator filterIter = filterDocIdSet.iterator(); - if (filterIter == null) { - // this means the filter does not accept any documents. + Bits acceptsDocs = context.reader.getLiveDocs(); + boolean useRandomAccess = false; + + if (filterDocIdSet instanceof Bits && useRandomAccess((Bits) filterDocIdSet)) { + useRandomAccess = true; + Bits randomAccessBits = (Bits) filterDocIdSet; + // Filter by random-access: we push the filter all the way down to the atomic + // scorers, so the bits are applied just like deleted docs + if (acceptsDocs != null && !filterDocIdSet.isContainsOnlyLiveDocs()) { + // TODO: we could swap order of these two if we knew + // which was more restrictive? + acceptsDocs = new AndBits(randomAccessBits, acceptsDocs); + } else { + acceptsDocs = randomAccessBits; + } + } + + // we are gonna advance() this scorer, so we set inorder=true/toplevel=false + Scorer scorer = weight.scorer(context, true, false, acceptsDocs); + if (scorer == null) { return; } - int filterDoc = filterIter.nextDoc(); - int scorerDoc = scorer.advance(filterDoc); - - collector.setScorer(scorer); - while (true) { - if (scorerDoc == filterDoc) { - // Check if scorer has exhausted, only before collecting. - if (scorerDoc == DocIdSetIterator.NO_MORE_DOCS) { - break; + + if (useRandomAccess) { + collector.setNextReader(context); + scorer.score(collector); + } else { + int docID = scorer.docID(); + assert docID == -1 || docID == DocIdSetIterator.NO_MORE_DOCS; + DocIdSetIterator filterIter = filterDocIdSet.iterator(); + if (filterIter == null) { + // this means the filter does not accept any documents. + return; + } + int filterDoc = filterIter.nextDoc(); + int scorerDoc = scorer.advance(filterDoc); + + collector.setScorer(scorer); + while (true) { + if (scorerDoc == filterDoc) { + // Check if scorer has exhausted, only before collecting. + if (scorerDoc == DocIdSetIterator.NO_MORE_DOCS) { + break; + } + collector.collect(scorerDoc); + filterDoc = filterIter.nextDoc(); + scorerDoc = scorer.advance(filterDoc); + } else if (scorerDoc > filterDoc) { + filterDoc = filterIter.advance(scorerDoc); + } else { + scorerDoc = scorer.advance(filterDoc); } - collector.collect(scorerDoc); - filterDoc = filterIter.nextDoc(); - scorerDoc = scorer.advance(filterDoc); - } else if (scorerDoc > filterDoc) { - filterDoc = filterIter.advance(scorerDoc); - } else { - scorerDoc = scorer.advance(filterDoc); } } } + /** + * Applies heuristics to determine if the given Bits should be used in + * random-access filtering. + * + * @param acceptDocs Bits to determine if they should be used in random- + * access filtering + * @return {@code true} if the Bits should be used in random-access filtering + */ + protected boolean useRandomAccess(Bits acceptDocs) { + return true; + } + /** Expert: called to re-write queries into primitive queries. * @throws BooleanQuery.TooManyClauses */ Index: lucene/src/java/org/apache/lucene/util/AndBits.java =================================================================== --- lucene/src/java/org/apache/lucene/util/AndBits.java (revision 0) +++ lucene/src/java/org/apache/lucene/util/AndBits.java (revision 0) @@ -0,0 +1,42 @@ +package org.apache.lucene.util; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @lucene.experimental + */ +public class AndBits implements Bits { + + private final Bits bits1; + private final Bits bits2; + + public AndBits(Bits bits1, Bits bits2) { + this.bits1 = bits1; + this.bits2 = bits2; + } + + @Override + public boolean get(int index) { + return bits1.get(index) && bits2.get(index); + } + + @Override + public int length() { + return Math.min(bits1.length(), bits2.length()); + } +} Index: lucene/src/test-framework/org/apache/lucene/search/CheckHits.java =================================================================== --- lucene/src/test-framework/org/apache/lucene/search/CheckHits.java (revision 1178176) +++ lucene/src/test-framework/org/apache/lucene/search/CheckHits.java (working copy) @@ -104,7 +104,7 @@ IndexSearcher s = QueryUtils.wrapUnderlyingReader (random, searcher, i); s.search(query, c); - Assert.assertEquals("Wrap Reader " + i + ": " + + Assert.assertEquals("Wrap Reader " + i + ": s=" + s + "query=" + query.toString(defaultFieldName), correct, actual); FieldCache.DEFAULT.purge(s.getIndexReader()); // our wrapping can create insanity otherwise