Index: src/java/org/apache/lucene/search/CachingSpanFilter.java =================================================================== --- src/java/org/apache/lucene/search/CachingSpanFilter.java (revision 1179349) +++ src/java/org/apache/lucene/search/CachingSpanFilter.java (working copy) @@ -83,6 +83,7 @@ missCount++; result = filter.bitSpans(context); + result.getDocIdSet().setContainsOnlyLiveDocs(cache.deletesMode == CachingWrapperFilter.DeletesMode.RECACHE); cache.put(coreKey, delCoreKey, result); return result; Index: src/java/org/apache/lucene/search/CachingWrapperFilter.java =================================================================== --- src/java/org/apache/lucene/search/CachingWrapperFilter.java (revision 1179349) +++ src/java/org/apache/lucene/search/CachingWrapperFilter.java (working copy) @@ -76,7 +76,7 @@ // after de-serialize transient Map cache; - private final DeletesMode deletesMode; + final DeletesMode deletesMode; public FilterCache(DeletesMode deletesMode) { this.deletesMode = deletesMode; @@ -217,6 +217,7 @@ docIdSet = docIdSetToCache(filter.getDocIdSet(context), reader); if (docIdSet != null) { + docIdSet.setContainsOnlyLiveDocs(cache.deletesMode != DeletesMode.IGNORE); cache.put(coreKey, delCoreKey, docIdSet); } Index: src/java/org/apache/lucene/search/ConstantScoreQuery.java =================================================================== --- src/java/org/apache/lucene/search/ConstantScoreQuery.java (revision 1179349) +++ src/java/org/apache/lucene/search/ConstantScoreQuery.java (working copy) @@ -128,15 +128,26 @@ @Override public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, - boolean topScorer, Bits acceptDocs) throws IOException { + boolean topScorer, final Bits acceptDocs) throws IOException { final DocIdSetIterator disi; if (filter != null) { assert query == null; final DocIdSet dis = filter.getDocIdSet(context); if (dis == null) { return null; + } else if (acceptDocs != null) { + // Must dynamically AND in the incoming + // acceptOnlyDocs since we have no way to ask + // the filter to do so + disi = new FilteredDocIdSetIterator(dis.iterator()) { + @Override + protected boolean match(int doc) throws IOException { + return acceptDocs.get(doc); + } + }; + } else { + disi = dis.iterator(); } - disi = dis.iterator(); } else { assert query != null && innerWeight != null; disi = innerWeight.scorer(context, scoreDocsInOrder, topScorer, acceptDocs); Index: src/java/org/apache/lucene/search/DocIdSet.java =================================================================== --- src/java/org/apache/lucene/search/DocIdSet.java (revision 1179349) +++ src/java/org/apache/lucene/search/DocIdSet.java (working copy) @@ -25,6 +25,10 @@ */ public abstract class DocIdSet { + // If true, this Filter includes only live documents (ie + // deleted documents have already been filtered out): + private boolean containsOnlyLiveDocs; + /** An empty {@code DocIdSet} instance for easy use, e.g. in Filters that hit no documents. */ public static final DocIdSet EMPTY_DOCIDSET = new DocIdSet() { @@ -64,4 +68,27 @@ public boolean isCacheable() { return false; } + + /** + * Returns whether this DocIdSet contains only live documents (deleted documents + * have already been filtered out) + * + * @return {@code true} if the DocIdSet contains only live documents, + * {@code false} otherwise + * @lucene.experimental + */ + public boolean containsOnlyLiveDocs() { + return containsOnlyLiveDocs; + } + + /** + * Allows this DocIdSet to be marked as containing only live documents + * (documents that have not been deleted). + * + * @param containsOnlyLiveDocs Whether this DocIdSet contains only live documents + * @lucene.experimental + */ + public void setContainsOnlyLiveDocs(boolean containsOnlyLiveDocs) { + this.containsOnlyLiveDocs = containsOnlyLiveDocs; + } } Index: src/java/org/apache/lucene/search/FieldCacheRangeFilter.java =================================================================== --- src/java/org/apache/lucene/search/FieldCacheRangeFilter.java (revision 1179349) +++ src/java/org/apache/lucene/search/FieldCacheRangeFilter.java (working copy) @@ -122,11 +122,9 @@ assert inclusiveLowerPoint > 0 && inclusiveUpperPoint > 0; - // for this DocIdSet, we can ignore deleted docs - // because deleted docs have an order of 0 (null entry in StringIndex) - return new FieldCacheDocIdSet(context.reader, true) { + return new FieldCacheDocIdSet(context.reader) { @Override - final boolean matchDoc(int doc) { + public final boolean get(int doc) { final int docOrd = fcsi.getOrd(doc); return docOrd >= inclusiveLowerPoint && docOrd <= inclusiveUpperPoint; } @@ -175,10 +173,9 @@ return DocIdSet.EMPTY_DOCIDSET; final byte[] values = FieldCache.DEFAULT.getBytes(context.reader, field, (FieldCache.ByteParser) parser); - // we only respect deleted docs if the range contains 0 - return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)) { + return new FieldCacheDocIdSet(context.reader) { @Override - boolean matchDoc(int doc) { + public boolean get(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; } }; @@ -226,10 +223,9 @@ return DocIdSet.EMPTY_DOCIDSET; final short[] values = FieldCache.DEFAULT.getShorts(context.reader, field, (FieldCache.ShortParser) parser); - // ignore deleted docs if range doesn't contain 0 - return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)) { + return new FieldCacheDocIdSet(context.reader) { @Override - boolean matchDoc(int doc) { + public boolean get(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; } }; @@ -277,10 +273,9 @@ return DocIdSet.EMPTY_DOCIDSET; final int[] values = FieldCache.DEFAULT.getInts(context.reader, field, (FieldCache.IntParser) parser); - // ignore deleted docs if range doesn't contain 0 - return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)) { + return new FieldCacheDocIdSet(context.reader) { @Override - boolean matchDoc(int doc) { + public boolean get(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; } }; @@ -328,10 +323,9 @@ return DocIdSet.EMPTY_DOCIDSET; final long[] values = FieldCache.DEFAULT.getLongs(context.reader, field, (FieldCache.LongParser) parser); - // ignore deleted docs if range doesn't contain 0 - return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0L && inclusiveUpperPoint >= 0L)) { + return new FieldCacheDocIdSet(context.reader) { @Override - boolean matchDoc(int doc) { + public boolean get(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; } }; @@ -383,10 +377,9 @@ return DocIdSet.EMPTY_DOCIDSET; final float[] values = FieldCache.DEFAULT.getFloats(context.reader, field, (FieldCache.FloatParser) parser); - // ignore deleted docs if range doesn't contain 0 - return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0.0f && inclusiveUpperPoint >= 0.0f)) { + return new FieldCacheDocIdSet(context.reader) { @Override - boolean matchDoc(int doc) { + public boolean get(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; } }; @@ -438,10 +431,9 @@ return DocIdSet.EMPTY_DOCIDSET; final double[] values = FieldCache.DEFAULT.getDoubles(context.reader, field, (FieldCache.DoubleParser) parser); - // ignore deleted docs if range doesn't contain 0 - return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0.0 && inclusiveUpperPoint >= 0.0)) { + return new FieldCacheDocIdSet(context.reader) { @Override - boolean matchDoc(int doc) { + public boolean get(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; } }; @@ -505,20 +497,25 @@ /** Returns the current numeric parser ({@code null} for {@code T} is {@code String}} */ public FieldCache.Parser getParser() { return parser; } - static abstract class FieldCacheDocIdSet extends DocIdSet { + static abstract class FieldCacheDocIdSet extends DocIdSet implements Bits { private final IndexReader reader; - private final boolean canIgnoreDeletedDocs; - FieldCacheDocIdSet(IndexReader reader, boolean canIgnoreDeletedDocs) { + FieldCacheDocIdSet(IndexReader reader) { this.reader = reader; - this.canIgnoreDeletedDocs = canIgnoreDeletedDocs; + setContainsOnlyLiveDocs(false); } /** * this method checks, if a doc is a hit, should throw AIOBE, when position * invalid */ - abstract boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException; + @Override + public abstract boolean get(int doc); + + @Override + public int length() { + return reader.maxDoc(); + } /** * this DocIdSet is always cacheable (does not go back @@ -531,79 +528,39 @@ @Override public DocIdSetIterator iterator() throws IOException { - - final Bits liveDocs = canIgnoreDeletedDocs ? null : reader.getLiveDocs(); - - if (liveDocs == null) { - // Specialization optimization disregard deletions - return new DocIdSetIterator() { - private int doc = -1; - @Override - public int docID() { - return doc; - } - - @Override - public int nextDoc() { - try { - do { - doc++; - } while (!matchDoc(doc)); - return doc; - } catch (ArrayIndexOutOfBoundsException e) { - return doc = NO_MORE_DOCS; - } - } - - @Override - public int advance(int target) { - try { - doc = target; - while (!matchDoc(doc)) { - doc++; - } - return doc; - } catch (ArrayIndexOutOfBoundsException e) { - return doc = NO_MORE_DOCS; - } - } - }; - } else { - // Must consult deletions - - final int maxDoc = reader.maxDoc(); - - // a DocIdSetIterator generating docIds by - // incrementing a variable & checking liveDocs - - return new DocIdSetIterator() { - private int doc = -1; - @Override - public int docID() { - return doc; - } - - @Override - public int nextDoc() { + // This disregards deletions + return new DocIdSetIterator() { + private int doc = -1; + @Override + public int docID() { + return doc; + } + + @Override + public int nextDoc() { + try { do { doc++; - if (doc >= maxDoc) { - return doc = NO_MORE_DOCS; - } - } while (!liveDocs.get(doc) || !matchDoc(doc)); + } while (!get(doc)); return doc; + } catch (ArrayIndexOutOfBoundsException e) { + return doc = NO_MORE_DOCS; } - - @Override - public int advance(int target) { - for(doc=target;doc filterDoc) { - filterDoc = filterIter.advance(scorerDoc); + + boolean filterContainsLiveDocs = filterDocIdSet.containsOnlyLiveDocs(); + // if our filter already filters-out deleted docs, then we don't need to + // worry about the reader's liveDocs set. + Bits acceptDocs = filterContainsLiveDocs ? null : context.reader.getLiveDocs(); + + int firstFilterDoc = filterIter.nextDoc(); + + boolean useRandomAccess = false; + + if (filterDocIdSet instanceof Bits && firstFilterDoc < filterRandomAccessThreshold) { + useRandomAccess = true; + Bits randomAccessBits = (Bits) filterDocIdSet; + // Filter by random-access: we push the filter all the way down to the atomic + // scorers, so the bits are applied just like deleted docs + if (acceptDocs != null) { + // TODO: we could swap order of these two if we knew + // which was more restrictive? + acceptDocs = new AndBits(randomAccessBits, acceptDocs); } else { - scorerDoc = scorer.advance(filterDoc); + acceptDocs = randomAccessBits; } } + + // if we are using random access, we collect normally, otherwise, + // we are gonna advance() this scorer, so we set inorder=true/toplevel=false + Scorer scorer = useRandomAccess + ? weight.scorer(context, !collector.acceptsDocsOutOfOrder(), true, acceptDocs) + : weight.scorer(context, true, false, acceptDocs); + if (scorer == null) { + return; + } + + if (useRandomAccess) { + collector.setNextReader(context); + scorer.score(collector); + } else { + int docID = scorer.docID(); + assert docID == -1 || docID == DocIdSetIterator.NO_MORE_DOCS; + int filterDoc = firstFilterDoc; + int scorerDoc = scorer.advance(filterDoc); + collector.setScorer(scorer); + while (true) { + if (scorerDoc == filterDoc) { + // Check if scorer has exhausted, only before collecting. + if (scorerDoc == DocIdSetIterator.NO_MORE_DOCS) { + break; + } + collector.collect(scorerDoc); + filterDoc = filterIter.nextDoc(); + scorerDoc = scorer.advance(filterDoc); + } else if (scorerDoc > filterDoc) { + filterDoc = filterIter.advance(scorerDoc); + } else { + scorerDoc = scorer.advance(filterDoc); + } + } + } } + private int filterRandomAccessThreshold = 100; + + /** + * Expert: + * + * @return Threshold used to heuristics to determine if a Filter is dense or sparse. + * @see #setFilterRandomAccessThreshold(int) + * @lucene.experimental + */ + public int getFilterRandomAccessThreshold() { + return filterRandomAccessThreshold; + } + + /** + * Expert: Sets the threshold used in the heuristics to determine if a + * Filter is dense or sparse (and therefore whether random-access should + * be used or not). If a document is found in the Filter beneath this + * threshold, it is assumed to be dense. + * + * @param value Threshold to be used in this IndexSearcher + * @lucene.experimental + */ + public void setFilterRandomAccessThreshold(int value) { + filterRandomAccessThreshold = value; + } + /** Expert: called to re-write queries into primitive queries. * @throws BooleanQuery.TooManyClauses */ Index: src/java/org/apache/lucene/util/AndBits.java =================================================================== --- src/java/org/apache/lucene/util/AndBits.java (revision 0) +++ src/java/org/apache/lucene/util/AndBits.java (revision 0) @@ -0,0 +1,42 @@ +package org.apache.lucene.util; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @lucene.experimental + */ +public final class AndBits implements Bits { + + private final Bits bits1; + private final Bits bits2; + + public AndBits(Bits bits1, Bits bits2) { + this.bits1 = bits1; + this.bits2 = bits2; + } + + @Override + public boolean get(int index) { + return bits1.get(index) && bits2.get(index); + } + + @Override + public int length() { + return Math.min(bits1.length(), bits2.length()); + } +} Property changes on: src\java\org\apache\lucene\util\AndBits.java ___________________________________________________________________ Added: svn:keywords + Date Author Id Revision HeadURL Added: svn:eol-style + native Index: src/test-framework/org/apache/lucene/util/LuceneTestCase.java =================================================================== --- src/test-framework/org/apache/lucene/util/LuceneTestCase.java (revision 1179349) +++ src/test-framework/org/apache/lucene/util/LuceneTestCase.java (working copy) @@ -1302,6 +1302,7 @@ } IndexSearcher ret = random.nextBoolean() ? new AssertingIndexSearcher(r) : new AssertingIndexSearcher(r.getTopReaderContext()); ret.setSimilarityProvider(similarityProvider); + ret.setFilterRandomAccessThreshold(_TestUtil.nextInt(random, 1, 200)); return ret; } else { int threads = 0; @@ -1326,6 +1327,7 @@ } }; ret.setSimilarityProvider(similarityProvider); + ret.setFilterRandomAccessThreshold(_TestUtil.nextInt(random, 1, 200)); return ret; } } Index: src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java =================================================================== --- src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java (revision 1179349) +++ src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java (working copy) @@ -158,9 +158,9 @@ return DocIdSet.EMPTY_DOCIDSET; } - return new FieldCacheRangeFilter.FieldCacheDocIdSet(context.reader, true) { + return new FieldCacheRangeFilter.FieldCacheDocIdSet(context.reader) { @Override - boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException { + public boolean get(int doc) throws ArrayIndexOutOfBoundsException { return termSet.get(fcsi.getOrd(doc)); } }; Index: src/test/org/apache/lucene/search/TestCachingSpanFilter.java =================================================================== --- src/test/org/apache/lucene/search/TestCachingSpanFilter.java (revision 1179349) +++ src/test/org/apache/lucene/search/TestCachingSpanFilter.java (working copy) @@ -87,13 +87,15 @@ searcher.close(); searcher = newSearcher(reader, false); + // no hit because IS ANDs del docs: docs = searcher.search(new MatchAllDocsQuery(), filter, 1); assertEquals("[query + filter] Should *not* find a hit...", 0, docs.totalHits); + // no hit because CSQ realizes wrapped filter includes + // non-live docs and ANDs the live docs on the fly: docs = searcher.search(constantScore, 1); - assertEquals("[just filter] Should find a hit...", 1, docs.totalHits); + assertEquals("[just filter] Should not find a hit...", 0, docs.totalHits); - // force cache to regenerate: filter = new CachingSpanFilter(startFilter, CachingWrapperFilter.DeletesMode.RECACHE); Index: src/test/org/apache/lucene/search/TestCachingWrapperFilter.java =================================================================== --- src/test/org/apache/lucene/search/TestCachingWrapperFilter.java (revision 1179349) +++ src/test/org/apache/lucene/search/TestCachingWrapperFilter.java (working copy) @@ -192,6 +192,8 @@ docs = searcher.search(new MatchAllDocsQuery(), filter, 1); assertEquals("[query + filter] Should find a hit...", 1, docs.totalHits); + + // ignore deletes ConstantScoreQuery constantScore = new ConstantScoreQuery(filter); docs = searcher.search(constantScore, 1); assertEquals("[just filter] Should find a hit...", 1, docs.totalHits); @@ -204,13 +206,15 @@ searcher.close(); searcher = newSearcher(reader, false); + // no hit because IS ANDs del docs docs = searcher.search(new MatchAllDocsQuery(), filter, 1); assertEquals("[query + filter] Should *not* find a hit...", 0, docs.totalHits); + // no hit because CSQ realizes wrapped filter includes + // non-live docs and ANDs the live docs on the fly: docs = searcher.search(constantScore, 1); - assertEquals("[just filter] Should find a hit...", 1, docs.totalHits); + assertEquals("[just filter] Should not find a hit...", 0, docs.totalHits); - // force cache to regenerate: filter = new CachingWrapperFilter(startFilter, CachingWrapperFilter.DeletesMode.RECACHE); Index: src/test/org/apache/lucene/search/TestConstantScoreQuery.java =================================================================== --- src/test/org/apache/lucene/search/TestConstantScoreQuery.java (revision 1179349) +++ src/test/org/apache/lucene/search/TestConstantScoreQuery.java (working copy) @@ -130,5 +130,32 @@ if (directory != null) directory.close(); } } + + public void testConstantScoreQueryAndFilter() throws Exception { + Directory d = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random, d); + Document doc = new Document(); + doc.add(newField("field", "a", StringField.TYPE_UNSTORED)); + w.addDocument(doc); + doc = new Document(); + doc.add(newField("field", "b", StringField.TYPE_UNSTORED)); + w.addDocument(doc); + IndexReader r = w.getReader(); + w.close(); + + Filter filterB = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("field", "b"))), CachingWrapperFilter.DeletesMode.RECACHE); + Query query = new ConstantScoreQuery(filterB); + + IndexSearcher s = new IndexSearcher(r); + assertEquals(1, s.search(query, filterB, 1).totalHits); // Query for field:b, Filter field:b + + Filter filterA = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("field", "a"))), CachingWrapperFilter.DeletesMode.RECACHE); + query = new ConstantScoreQuery(filterA); + + assertEquals(0, s.search(query, filterB, 1).totalHits); // Query field:b, Filter field:a + + r.close(); + d.close(); + } } Index: src/test/org/apache/lucene/search/TestFilteredSearch.java =================================================================== --- src/test/org/apache/lucene/search/TestFilteredSearch.java (revision 1179349) +++ src/test/org/apache/lucene/search/TestFilteredSearch.java (working copy) @@ -23,9 +23,12 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.Directory; @@ -116,4 +119,56 @@ } } + /** when a filter is executed random access, make sure + * we get BucketScorer + */ + public void testBS1WithRandomAccessFilter() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(random, dir); + Document d = new Document(); + d.add(newField("foo", "bar", TextField.TYPE_STORED)); + iw.addDocument(d); + d = new Document(); + d.add(newField("foo", "baz", TextField.TYPE_STORED)); + iw.addDocument(d); + + IndexReader ir = iw.getReader(); + iw.close(); + + IndexSearcher is = newSearcher(ir); + + // force the filter to be executed as bits + is.setFilterRandomAccessThreshold(Integer.MAX_VALUE); + + // for the combined BQ, the scorer should always be BooleanScorer's BucketScorer, because our scorer supports out-of order collection! + final String bucketScorerClass = BooleanScorer.class.getName() + "$BucketScorer"; + BooleanQuery bq = new BooleanQuery(); + bq.add(new TermQuery(new Term("foo", "bar")), BooleanClause.Occur.SHOULD); + bq.add(new TermQuery(new Term("foo", "baz")), BooleanClause.Occur.SHOULD); + is.search(bq, new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("foo", "bar")))), + new Collector() { + + @Override + public void setScorer(Scorer scorer) throws IOException { + assertEquals(bucketScorerClass, scorer.getClass().getName()); + } + + @Override + public void collect(int doc) throws IOException { + } + + @Override + public void setNextReader(AtomicReaderContext context) throws IOException { + } + + @Override + public boolean acceptsDocsOutOfOrder() { + return true; + } + } + ); + is.close(); + ir.close(); + dir.close(); + } }