Index: lucene/src/test/org/apache/lucene/search/TestConstantScoreQuery.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestConstantScoreQuery.java (revision 1179309) +++ lucene/src/test/org/apache/lucene/search/TestConstantScoreQuery.java (working copy) @@ -130,5 +130,32 @@ if (directory != null) directory.close(); } } + + public void testConstantScoreQueryAndFilter() throws Exception { + Directory d = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random, d); + Document doc = new Document(); + doc.add(newField("field", "a", StringField.TYPE_UNSTORED)); + w.addDocument(doc); + doc = new Document(); + doc.add(newField("field", "b", StringField.TYPE_UNSTORED)); + w.addDocument(doc); + IndexReader r = w.getReader(); + w.close(); + + Filter filterB = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("field", "b"))), CachingWrapperFilter.DeletesMode.RECACHE); + Query query = new ConstantScoreQuery(filterB); + + IndexSearcher s = new IndexSearcher(r); + assertEquals(1, s.search(query, filterB, 1).totalHits); // Query for field:b, Filter field:b + + Filter filterA = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("field", "a"))), CachingWrapperFilter.DeletesMode.RECACHE); + query = new ConstantScoreQuery(filterA); + + assertEquals(0, s.search(query, filterB, 1).totalHits); // Query field:b, Filter field:a + + r.close(); + d.close(); + } } Index: lucene/src/test/org/apache/lucene/search/TestFilteredSearch.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestFilteredSearch.java (revision 1179309) +++ lucene/src/test/org/apache/lucene/search/TestFilteredSearch.java (working copy) @@ -23,9 +23,12 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.Directory; @@ -116,4 +119,56 @@ } } + /** when a filter is executed random access, make sure + * we get BucketScorer + */ + public void testBS1WithRandomAccessFilter() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(random, dir); + Document d = new Document(); + d.add(newField("foo", "bar", TextField.TYPE_STORED)); + iw.addDocument(d); + d = new Document(); + d.add(newField("foo", "baz", TextField.TYPE_STORED)); + iw.addDocument(d); + + IndexReader ir = iw.getReader(); + iw.close(); + + IndexSearcher is = newSearcher(ir); + + // force the filter to be executed as bits + is.setFilterRandomAccessThreshold(Integer.MAX_VALUE); + + // for the combined BQ, the scorer should always be BooleanScorer's BucketScorer, because our scorer supports out-of order collection! + final String bucketScorerClass = BooleanScorer.class.getName() + "$BucketScorer"; + BooleanQuery bq = new BooleanQuery(); + bq.add(new TermQuery(new Term("foo", "bar")), BooleanClause.Occur.SHOULD); + bq.add(new TermQuery(new Term("foo", "baz")), BooleanClause.Occur.SHOULD); + is.search(bq, new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("foo", "bar")))), + new Collector() { + + @Override + public void setScorer(Scorer scorer) throws IOException { + assertEquals(bucketScorerClass, scorer.getClass().getName()); + } + + @Override + public void collect(int doc) throws IOException { + } + + @Override + public void setNextReader(AtomicReaderContext context) throws IOException { + } + + @Override + public boolean acceptsDocsOutOfOrder() { + return true; + } + } + ); + is.close(); + ir.close(); + dir.close(); + } } Index: lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java (revision 1179309) +++ lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java (working copy) @@ -128,11 +128,11 @@ @Override public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, - boolean topScorer, Bits acceptDocs) throws IOException { + boolean topScorer, final Bits acceptDocs) throws IOException { final DocIdSetIterator disi; if (filter != null) { assert query == null; - final DocIdSet dis = filter.getDocIdSet(context); + final DocIdSet dis = filter.getDocIdSet(context, acceptDocs); if (dis == null) { return null; } Index: lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java (revision 1179309) +++ lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java (working copy) @@ -105,7 +105,7 @@ * results. */ @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { final IndexReader reader = context.reader; final Fields fields = reader.fields(); if (fields == null) { @@ -125,13 +125,12 @@ // fill into a FixedBitSet final FixedBitSet bitSet = new FixedBitSet(context.reader.maxDoc()); int termCount = 0; - final Bits liveDocs = reader.getLiveDocs(); DocsEnum docsEnum = null; do { termCount++; // System.out.println(" iter termCount=" + termCount + " term=" + // enumerator.term().toBytesString()); - docsEnum = termsEnum.docs(liveDocs, docsEnum); + docsEnum = termsEnum.docs(acceptDocs, docsEnum); final DocsEnum.BulkReadResult result = docsEnum.getBulkResult(); while (true) { final int count = docsEnum.read(); Index: lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java (revision 1179309) +++ lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java (working copy) @@ -19,8 +19,11 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.FixedBitSet; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; /** * Wraps another SpanFilter's result and caches it. The purpose is to allow @@ -40,61 +43,95 @@ * @param filter Filter to cache results of */ public CachingSpanFilter(SpanFilter filter) { - this(filter, CachingWrapperFilter.DeletesMode.RECACHE); + this.filter = filter; + this.cache = new CachingWrapperFilter.FilterCache(); } - /** - * @param filter Filter to cache results of - * @param deletesMode See {@link CachingWrapperFilter.DeletesMode} - */ - public CachingSpanFilter(SpanFilter filter, CachingWrapperFilter.DeletesMode deletesMode) { - this.filter = filter; - if (deletesMode == CachingWrapperFilter.DeletesMode.DYNAMIC) { - throw new IllegalArgumentException("DeletesMode.DYNAMIC is not supported"); - } - this.cache = new CachingWrapperFilter.FilterCache(deletesMode) { + @Override + public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException { + final SpanFilterResult result = getCachedResult(context); + final DocIdSet dis = result.getDocIdSet(); + return (acceptDocs == null) ? dis : new FilteredDocIdSet(dis) { @Override - protected SpanFilterResult mergeLiveDocs(final Bits liveDocs, final SpanFilterResult value) { - throw new IllegalStateException("DeletesMode.DYNAMIC is not supported"); + protected boolean match(int docid) { + return acceptDocs.get(docid); } }; } @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { - SpanFilterResult result = getCachedResult(context); - return result != null ? result.getDocIdSet() : null; + public SpanFilterResult bitSpans(AtomicReaderContext context, final Bits acceptDocs) throws IOException { + final SpanFilterResult result = getCachedResult(context); + if (acceptDocs == null) { + return result; + } else { + // nocommit: filter positions + List allPositions = result.getPositions(); + List positions = new ArrayList(allPositions.size() / 2 + 1); + for (SpanFilterResult.PositionInfo p : allPositions) { + if (acceptDocs.get(p.getDoc())) { + positions.add(p); + } + } + return new SpanFilterResult(new FilteredDocIdSet(result.getDocIdSet()) { + @Override + protected boolean match(int docid) { + return acceptDocs.get(docid); + } + }, positions); + } } + + /** Provide the DocIdSet to be cached, using the DocIdSet provided + * by the wrapped Filter. + *

This implementation returns the given {@link DocIdSet}, if {@link DocIdSet#isCacheable} + * returns true, else it copies the {@link DocIdSetIterator} into + * an {@link FixedBitSet}. + */ + protected SpanFilterResult spanFilterResultToCache(SpanFilterResult result, IndexReader reader) throws IOException { + if (result == null || result.getDocIdSet() == null) { + // this is better than returning null, as the nonnull result can be cached + return SpanFilterResult.EMPTY_SPAN_FILTER_RESULT; + } else if (result.getDocIdSet().isCacheable()) { + return result; + } else { + final DocIdSetIterator it = result.getDocIdSet().iterator(); + // null is allowed to be returned by iterator(), + // in this case we wrap with the empty set, + // which is cacheable. + if (it == null) { + return SpanFilterResult.EMPTY_SPAN_FILTER_RESULT; + } else { + final FixedBitSet bits = new FixedBitSet(reader.maxDoc()); + bits.or(it); + return new SpanFilterResult(bits, result.getPositions()); + } + } + } // for testing int hitCount, missCount; private SpanFilterResult getCachedResult(AtomicReaderContext context) throws IOException { final IndexReader reader = context.reader; - final Object coreKey = reader.getCoreCacheKey(); - final Object delCoreKey = reader.hasDeletions() ? reader.getLiveDocs() : coreKey; - SpanFilterResult result = cache.get(reader, coreKey, delCoreKey); + SpanFilterResult result = cache.get(reader, coreKey); if (result != null) { hitCount++; return result; + } else { + missCount++; + // cache miss: we use no acceptDocs here + // (this saves time on building SpanFilterResult, the acceptDocs will be applied on the cached set) + result = spanFilterResultToCache(filter.bitSpans(context, null/**!!!*/), reader); + cache.put(coreKey, result); } - - missCount++; - result = filter.bitSpans(context); - - cache.put(coreKey, delCoreKey, result); + return result; } - @Override - public SpanFilterResult bitSpans(AtomicReaderContext context) throws IOException { - return getCachedResult(context); - } - - @Override public String toString() { return "CachingSpanFilter("+filter+")"; } Index: lucene/src/java/org/apache/lucene/search/Filter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/Filter.java (revision 1179309) +++ lucene/src/java/org/apache/lucene/search/Filter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.index.IndexReader; // javadocs import org.apache.lucene.index.IndexReader.AtomicReaderContext; -import org.apache.lucene.util.DocIdBitSet; +import org.apache.lucene.util.Bits; /** * Abstract base class for restricting which documents may @@ -44,14 +44,16 @@ * represent the whole underlying index i.e. if the index has more than * one segment the given reader only represents a single segment. * The provided context is always an atomic context, so you can call - * {@link IndexReader#fields()} or {@link IndexReader#getLiveDocs()} + * {@link IndexReader#fields()} * on the context's reader, for example. + * + * @param acceptDocs + * Bits that represent the allowable docs to match (typically deleted docs + * but possibly filtering other documents) * * @return a DocIdSet that provides the documents which should be permitted or * prohibited in search results. NOTE: null can be returned if * no documents will be accepted by this Filter. - * - * @see DocIdBitSet */ - public abstract DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException; + public abstract DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException; } Index: lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java (revision 1179309) +++ lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java (working copy) @@ -73,7 +73,7 @@ /** This method is implemented for each data type */ @Override - public abstract DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException; + public abstract DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException; /** * Creates a string range filter using {@link FieldCache#getTermsIndex}. This works with all @@ -83,7 +83,7 @@ public static FieldCacheRangeFilter newStringRange(String field, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) { return new FieldCacheRangeFilter(field, null, lowerVal, upperVal, includeLower, includeUpper) { @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { final FieldCache.DocTermsIndex fcsi = FieldCache.DEFAULT.getTermsIndex(context.reader, field); final BytesRef spare = new BytesRef(); final int lowerPoint = fcsi.binarySearchLookup(lowerVal == null ? null : new BytesRef(lowerVal), spare); @@ -122,9 +122,7 @@ assert inclusiveLowerPoint > 0 && inclusiveUpperPoint > 0; - // for this DocIdSet, we can ignore deleted docs - // because deleted docs have an order of 0 (null entry in StringIndex) - return new FieldCacheDocIdSet(context.reader, true) { + return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) { @Override final boolean matchDoc(int doc) { final int docOrd = fcsi.getOrd(doc); @@ -152,7 +150,7 @@ public static FieldCacheRangeFilter newByteRange(String field, FieldCache.ByteParser parser, Byte lowerVal, Byte upperVal, boolean includeLower, boolean includeUpper) { return new FieldCacheRangeFilter(field, parser, lowerVal, upperVal, includeLower, includeUpper) { @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { final byte inclusiveLowerPoint, inclusiveUpperPoint; if (lowerVal != null) { final byte i = lowerVal.byteValue(); @@ -175,8 +173,7 @@ return DocIdSet.EMPTY_DOCIDSET; final byte[] values = FieldCache.DEFAULT.getBytes(context.reader, field, (FieldCache.ByteParser) parser); - // we only respect deleted docs if the range contains 0 - return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)) { + return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) { @Override boolean matchDoc(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; @@ -203,7 +200,7 @@ public static FieldCacheRangeFilter newShortRange(String field, FieldCache.ShortParser parser, Short lowerVal, Short upperVal, boolean includeLower, boolean includeUpper) { return new FieldCacheRangeFilter(field, parser, lowerVal, upperVal, includeLower, includeUpper) { @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { final short inclusiveLowerPoint, inclusiveUpperPoint; if (lowerVal != null) { short i = lowerVal.shortValue(); @@ -226,8 +223,7 @@ return DocIdSet.EMPTY_DOCIDSET; final short[] values = FieldCache.DEFAULT.getShorts(context.reader, field, (FieldCache.ShortParser) parser); - // ignore deleted docs if range doesn't contain 0 - return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)) { + return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) { @Override boolean matchDoc(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; @@ -254,7 +250,7 @@ public static FieldCacheRangeFilter newIntRange(String field, FieldCache.IntParser parser, Integer lowerVal, Integer upperVal, boolean includeLower, boolean includeUpper) { return new FieldCacheRangeFilter(field, parser, lowerVal, upperVal, includeLower, includeUpper) { @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { final int inclusiveLowerPoint, inclusiveUpperPoint; if (lowerVal != null) { int i = lowerVal.intValue(); @@ -277,8 +273,7 @@ return DocIdSet.EMPTY_DOCIDSET; final int[] values = FieldCache.DEFAULT.getInts(context.reader, field, (FieldCache.IntParser) parser); - // ignore deleted docs if range doesn't contain 0 - return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)) { + return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) { @Override boolean matchDoc(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; @@ -305,7 +300,7 @@ public static FieldCacheRangeFilter newLongRange(String field, FieldCache.LongParser parser, Long lowerVal, Long upperVal, boolean includeLower, boolean includeUpper) { return new FieldCacheRangeFilter(field, parser, lowerVal, upperVal, includeLower, includeUpper) { @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { final long inclusiveLowerPoint, inclusiveUpperPoint; if (lowerVal != null) { long i = lowerVal.longValue(); @@ -328,8 +323,7 @@ return DocIdSet.EMPTY_DOCIDSET; final long[] values = FieldCache.DEFAULT.getLongs(context.reader, field, (FieldCache.LongParser) parser); - // ignore deleted docs if range doesn't contain 0 - return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0L && inclusiveUpperPoint >= 0L)) { + return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) { @Override boolean matchDoc(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; @@ -356,7 +350,7 @@ public static FieldCacheRangeFilter newFloatRange(String field, FieldCache.FloatParser parser, Float lowerVal, Float upperVal, boolean includeLower, boolean includeUpper) { return new FieldCacheRangeFilter(field, parser, lowerVal, upperVal, includeLower, includeUpper) { @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { // we transform the floating point numbers to sortable integers // using NumericUtils to easier find the next bigger/lower value final float inclusiveLowerPoint, inclusiveUpperPoint; @@ -383,8 +377,7 @@ return DocIdSet.EMPTY_DOCIDSET; final float[] values = FieldCache.DEFAULT.getFloats(context.reader, field, (FieldCache.FloatParser) parser); - // ignore deleted docs if range doesn't contain 0 - return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0.0f && inclusiveUpperPoint >= 0.0f)) { + return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) { @Override boolean matchDoc(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; @@ -411,7 +404,7 @@ public static FieldCacheRangeFilter newDoubleRange(String field, FieldCache.DoubleParser parser, Double lowerVal, Double upperVal, boolean includeLower, boolean includeUpper) { return new FieldCacheRangeFilter(field, parser, lowerVal, upperVal, includeLower, includeUpper) { @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { // we transform the floating point numbers to sortable integers // using NumericUtils to easier find the next bigger/lower value final double inclusiveLowerPoint, inclusiveUpperPoint; @@ -439,7 +432,7 @@ final double[] values = FieldCache.DEFAULT.getDoubles(context.reader, field, (FieldCache.DoubleParser) parser); // ignore deleted docs if range doesn't contain 0 - return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0.0 && inclusiveUpperPoint >= 0.0)) { + return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) { @Override boolean matchDoc(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; @@ -506,12 +499,12 @@ public FieldCache.Parser getParser() { return parser; } static abstract class FieldCacheDocIdSet extends DocIdSet { - private final IndexReader reader; - private final boolean canIgnoreDeletedDocs; + private final int maxDoc; + private final Bits acceptDocs; - FieldCacheDocIdSet(IndexReader reader, boolean canIgnoreDeletedDocs) { - this.reader = reader; - this.canIgnoreDeletedDocs = canIgnoreDeletedDocs; + FieldCacheDocIdSet(int maxDoc, Bits acceptDocs) { + this.maxDoc = maxDoc; + this.acceptDocs = acceptDocs; } /** @@ -530,11 +523,29 @@ } @Override - public DocIdSetIterator iterator() throws IOException { + public Bits bits() { + return (acceptDocs == null) ? new Bits() { + public boolean get(int docid) { + return FieldCacheDocIdSet.this.matchDoc(docid); + } - final Bits liveDocs = canIgnoreDeletedDocs ? null : reader.getLiveDocs(); + public int length() { + return FieldCacheDocIdSet.this.maxDoc; + } + } : new Bits() { + public boolean get(int docid) { + return acceptDocs.get(docid) && FieldCacheDocIdSet.this.matchDoc(docid); + } - if (liveDocs == null) { + public int length() { + return FieldCacheDocIdSet.this.maxDoc; + } + }; + } + + @Override + public DocIdSetIterator iterator() throws IOException { + if (acceptDocs == null) { // Specialization optimization disregard deletions return new DocIdSetIterator() { private int doc = -1; @@ -569,12 +580,10 @@ } }; } else { - // Must consult deletions + // Must consult acceptDocs - final int maxDoc = reader.maxDoc(); - // a DocIdSetIterator generating docIds by - // incrementing a variable & checking liveDocs - + // incrementing a variable & checking acceptDocs - return new DocIdSetIterator() { private int doc = -1; @Override @@ -589,14 +598,14 @@ if (doc >= maxDoc) { return doc = NO_MORE_DOCS; } - } while (!liveDocs.get(doc) || !matchDoc(doc)); + } while (!acceptDocs.get(doc) || !matchDoc(doc)); return doc; } @Override public int advance(int target) { for(doc=target;doc tmp = new ArrayList(20); int currentDoc = -1; SpanFilterResult.PositionInfo currentInfo = null; Index: lucene/src/java/org/apache/lucene/search/SpanFilterResult.java =================================================================== --- lucene/src/java/org/apache/lucene/search/SpanFilterResult.java (revision 1179309) +++ lucene/src/java/org/apache/lucene/search/SpanFilterResult.java (working copy) @@ -16,7 +16,7 @@ */ import java.util.ArrayList; - +import java.util.Collections; import java.util.List; @@ -30,6 +30,9 @@ private DocIdSet docIdSet; private List positions;//Spans spans; + public static final SpanFilterResult EMPTY_SPAN_FILTER_RESULT = + new SpanFilterResult(DocIdSet.EMPTY_DOCIDSET, Collections.emptyList()); + /** * * @param docIdSet The DocIdSet for the Filter Index: lucene/src/java/org/apache/lucene/search/SpanFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/SpanFilter.java (revision 1179309) +++ lucene/src/java/org/apache/lucene/search/SpanFilter.java (working copy) @@ -16,6 +16,7 @@ */ import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.util.Bits; import java.io.IOException; @@ -34,5 +35,5 @@ * @return A {@link SpanFilterResult} * @throws java.io.IOException if there was an issue accessing the necessary information * */ - public abstract SpanFilterResult bitSpans(AtomicReaderContext context) throws IOException; + public abstract SpanFilterResult bitSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException; } Index: lucene/src/java/org/apache/lucene/search/FilteredQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FilteredQuery.java (revision 1179309) +++ lucene/src/java/org/apache/lucene/search/FilteredQuery.java (working copy) @@ -85,7 +85,7 @@ inner.addDetail(preBoost); } Filter f = FilteredQuery.this.filter; - DocIdSet docIdSet = f.getDocIdSet(ir); + DocIdSet docIdSet = f.getDocIdSet(ir, null); DocIdSetIterator docIdSetIterator = docIdSet == null ? DocIdSet.EMPTY_DOCIDSET.iterator() : docIdSet.iterator(); if (docIdSetIterator == null) { docIdSetIterator = DocIdSet.EMPTY_DOCIDSET.iterator(); @@ -114,7 +114,8 @@ if (scorer == null) { return null; } - DocIdSet docIdSet = filter.getDocIdSet(context); + // TODO: replicate code from IndexSearcher here for direct access to random access Bits + DocIdSet docIdSet = filter.getDocIdSet(context, context.reader.getLiveDocs()); if (docIdSet == null) { return null; } Index: lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java (revision 1179309) +++ lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java (working copy) @@ -23,6 +23,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; /** @@ -116,68 +117,24 @@ } @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { - return new FieldCacheTermsFilterDocIdSet(getFieldCache().getTermsIndex(context.reader, field)); - } - - protected class FieldCacheTermsFilterDocIdSet extends DocIdSet { - private FieldCache.DocTermsIndex fcsi; - - private FixedBitSet bits; - - public FieldCacheTermsFilterDocIdSet(FieldCache.DocTermsIndex fcsi) { - this.fcsi = fcsi; - bits = new FixedBitSet(this.fcsi.numOrd()); - final BytesRef spare = new BytesRef(); - for (int i=0;i 0) { - bits.set(termNumber); - } + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { + final FieldCache.DocTermsIndex fcsi = getFieldCache().getTermsIndex(context.reader, field); + final FixedBitSet bits = new FixedBitSet(fcsi.numOrd()); + final BytesRef spare = new BytesRef(); + for (int i=0;i 0) { + bits.set(termNumber); } } - - @Override - public DocIdSetIterator iterator() { - return new FieldCacheTermsFilterDocIdSetIterator(); - } - - /** This DocIdSet implementation is cacheable. */ - @Override - public boolean isCacheable() { - return true; - } - - protected class FieldCacheTermsFilterDocIdSetIterator extends DocIdSetIterator { - private int doc = -1; - + final int maxDoc = context.reader.maxDoc(); + return new FieldCacheRangeFilter.FieldCacheDocIdSet(maxDoc, acceptDocs) { @Override - public int docID() { - return doc; + boolean matchDoc(int doc) { + if (doc >= maxDoc) + throw new ArrayIndexOutOfBoundsException(); + return bits.get(fcsi.getOrd(doc)); } - - @Override - public int nextDoc() { - try { - while (!bits.get(fcsi.getOrd(++doc))) {} - } catch (ArrayIndexOutOfBoundsException e) { - doc = NO_MORE_DOCS; - } - return doc; - } - - @Override - public int advance(int target) { - try { - doc = target; - while (!bits.get(fcsi.getOrd(doc))) { - doc++; - } - } catch (ArrayIndexOutOfBoundsException e) { - doc = NO_MORE_DOCS; - } - return doc; - } - } + }; } } Index: lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java (revision 1179309) +++ lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java (working copy) @@ -67,7 +67,7 @@ protected final FilterCache cache; - static abstract class FilterCache { + static class FilterCache { /** * A transient Filter cache (package private because of test) @@ -76,97 +76,29 @@ // after de-serialize transient Map cache; - private final DeletesMode deletesMode; - - public FilterCache(DeletesMode deletesMode) { - this.deletesMode = deletesMode; - } - - public synchronized T get(IndexReader reader, Object coreKey, Object delCoreKey) throws IOException { + public synchronized T get(IndexReader reader, Object coreKey) throws IOException { T value; if (cache == null) { cache = new WeakHashMap(); } - if (deletesMode == DeletesMode.IGNORE) { - // key on core - value = cache.get(coreKey); - } else if (deletesMode == DeletesMode.RECACHE) { - // key on deletes, if any, else core - value = cache.get(delCoreKey); - } else { - - assert deletesMode == DeletesMode.DYNAMIC; - - // first try for exact match - value = cache.get(delCoreKey); - - if (value == null) { - // now for core match, but dynamically AND - // live docs - value = cache.get(coreKey); - if (value != null) { - final Bits liveDocs = reader.getLiveDocs(); - if (liveDocs != null) { - value = mergeLiveDocs(liveDocs, value); - } - } - } - } - - return value; + return cache.get(coreKey); } - protected abstract T mergeLiveDocs(Bits liveDocs, T value); - - public synchronized void put(Object coreKey, Object delCoreKey, T value) { - if (deletesMode == DeletesMode.IGNORE) { - cache.put(coreKey, value); - } else if (deletesMode == DeletesMode.RECACHE) { - cache.put(delCoreKey, value); - } else { - cache.put(coreKey, value); - cache.put(delCoreKey, value); - } + public synchronized void put(Object coreKey, T value) { + cache.put(coreKey, value); } } /** * New deletes are ignored by default, which gives higher - * cache hit rate on reopened readers. Most of the time - * this is safe, because the filter will be AND'd with a - * Query that fully enforces deletions. If instead you - * need this filter to always enforce deletions, pass - * either {@link DeletesMode#RECACHE} or {@link - * DeletesMode#DYNAMIC}. + * cache hit rate on reopened readers. * @param filter Filter to cache results of */ public CachingWrapperFilter(Filter filter) { - this(filter, DeletesMode.IGNORE); - } - - /** - * Expert: by default, the cached filter will be shared - * across reopened segments that only had changes to their - * deletions. - * - * @param filter Filter to cache results of - * @param deletesMode See {@link DeletesMode} - */ - public CachingWrapperFilter(Filter filter, DeletesMode deletesMode) { this.filter = filter; - cache = new FilterCache(deletesMode) { - @Override - public DocIdSet mergeLiveDocs(final Bits liveDocs, final DocIdSet docIdSet) { - return new FilteredDocIdSet(docIdSet) { - @Override - protected boolean match(int docID) { - return liveDocs.get(docID); - } - }; - } - }; + cache = new FilterCache(); } /** Provide the DocIdSet to be cached, using the DocIdSet provided @@ -200,27 +132,28 @@ int hitCount, missCount; @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException { final IndexReader reader = context.reader; final Object coreKey = reader.getCoreCacheKey(); - final Object delCoreKey = reader.hasDeletions() ? reader.getLiveDocs() : coreKey; - DocIdSet docIdSet = cache.get(reader, coreKey, delCoreKey); + DocIdSet docIdSet = cache.get(reader, coreKey); if (docIdSet != null) { hitCount++; return docIdSet; + } else { + missCount++; + // cache miss: we use no acceptDocs here + // (this saves time on building DocIdSet, the acceptDocs will be applied on the cached set) + docIdSet = docIdSetToCache(filter.getDocIdSet(context, null/**!!!*/), reader); + cache.put(coreKey, docIdSet); } - - missCount++; - - // cache miss - docIdSet = docIdSetToCache(filter.getDocIdSet(context), reader); - - if (docIdSet != null) { - cache.put(coreKey, delCoreKey, docIdSet); - } - return docIdSet; + return (acceptDocs == null) ? docIdSet : new FilteredDocIdSet(docIdSet) { + @Override + protected boolean match(int docid) { + return acceptDocs.get(docid); + } + }; } @Override Index: lucene/src/java/org/apache/lucene/search/DocIdSet.java =================================================================== --- lucene/src/java/org/apache/lucene/search/DocIdSet.java (revision 1179309) +++ lucene/src/java/org/apache/lucene/search/DocIdSet.java (working copy) @@ -18,6 +18,7 @@ */ import java.io.IOException; +import org.apache.lucene.util.Bits; /** * A DocIdSet contains a set of doc ids. Implementing classes must @@ -46,6 +47,12 @@ public boolean isCacheable() { return true; } + + // we explicitely provide no random access, as this filter is 100% sparse and iterator exits faster + @Override + public Bits bits() throws IOException { + return null; + } }; /** Provides a {@link DocIdSetIterator} to access the set. @@ -54,6 +61,13 @@ * are no docs that match. */ public abstract DocIdSetIterator iterator() throws IOException; + /** Optionally provides a {@link Bits} interface for random access. + * Returns {@code null}, if this DocIdSet does not support random access. + * The default implementation does not provide random access */ + public Bits bits() throws IOException { + return null; + } + /** * This method is a hint for {@link CachingWrapperFilter}, if this DocIdSet * should be cached without copying it into a BitSet. The default is to return Index: lucene/src/java/org/apache/lucene/search/QueryWrapperFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/QueryWrapperFilter.java (revision 1179309) +++ lucene/src/java/org/apache/lucene/search/QueryWrapperFilter.java (working copy) @@ -20,6 +20,7 @@ import java.io.IOException; import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.util.Bits; /** * Constrains search results to only match those which also match a provided @@ -47,7 +48,7 @@ } @Override - public DocIdSet getDocIdSet(final AtomicReaderContext context) throws IOException { + public DocIdSet getDocIdSet(final AtomicReaderContext context, final Bits acceptDocs) throws IOException { // get a private context that is used to rewrite, createWeight and score eventually assert context.reader.getTopReaderContext().isAtomic; final AtomicReaderContext privateContext = (AtomicReaderContext) context.reader.getTopReaderContext(); @@ -55,7 +56,7 @@ return new DocIdSet() { @Override public DocIdSetIterator iterator() throws IOException { - return weight.scorer(privateContext, true, false, privateContext.reader.getLiveDocs()); + return weight.scorer(privateContext, true, false, acceptDocs); } @Override public boolean isCacheable() { return false; } Index: lucene/src/java/org/apache/lucene/search/FilteredDocIdSet.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FilteredDocIdSet.java (revision 1179309) +++ lucene/src/java/org/apache/lucene/search/FilteredDocIdSet.java (working copy) @@ -18,6 +18,7 @@ */ import java.io.IOException; +import org.apache.lucene.util.Bits; /** * Abstract decorator class for a DocIdSet implementation @@ -54,14 +55,33 @@ public boolean isCacheable() { return _innerSet.isCacheable(); } + + @Override + public Bits bits() throws IOException { + final Bits bits = _innerSet.bits(); + return (bits == null) ? null : new Bits() { + public boolean get(int docid) { + try { + return bits.get(docid) && FilteredDocIdSet.this.match(docid); + } catch (IOException ioe) { + // nocommit: how to handle this here? + throw new RuntimeException(ioe); + } + } + public int length() { + return bits.length(); + } + }; + } + /** * Validation method to determine whether a docid should be in the result set. * @param docid docid to be tested * @return true if input docid should be in the result set, false otherwise. */ protected abstract boolean match(int docid) throws IOException; - + /** * Implementation of the contract to build a DocIdSetIterator. * @see DocIdSetIterator Index: lucene/src/java/org/apache/lucene/search/IndexSearcher.java =================================================================== --- lucene/src/java/org/apache/lucene/search/IndexSearcher.java (revision 1179309) +++ lucene/src/java/org/apache/lucene/search/IndexSearcher.java (working copy) @@ -41,6 +41,7 @@ import org.apache.lucene.search.similarities.SimilarityProvider; import org.apache.lucene.store.Directory; import org.apache.lucene.store.NIOFSDirectory; // javadoc +import org.apache.lucene.util.Bits; import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.ThreadInterruptedException; @@ -589,18 +590,9 @@ final Filter filter, final Collector collector) throws IOException { assert filter != null; - - // we are gonna advance() this scorer, so we set inorder=true/toplevel=false - Scorer scorer = weight.scorer(context, true, false, context.reader.getLiveDocs()); - if (scorer == null) { - return; - } - int docID = scorer.docID(); - assert docID == -1 || docID == DocIdSetIterator.NO_MORE_DOCS; - - // CHECKME: use ConjunctionScorer here? - DocIdSet filterDocIdSet = filter.getDocIdSet(context); + final Bits liveDocs = context.reader.getLiveDocs(); + DocIdSet filterDocIdSet = filter.getDocIdSet(context, liveDocs); if (filterDocIdSet == null) { // this means the filter does not accept any documents. return; @@ -611,27 +603,70 @@ // this means the filter does not accept any documents. return; } - int filterDoc = filterIter.nextDoc(); - int scorerDoc = scorer.advance(filterDoc); - - collector.setScorer(scorer); - while (true) { - if (scorerDoc == filterDoc) { - // Check if scorer has exhausted, only before collecting. - if (scorerDoc == DocIdSetIterator.NO_MORE_DOCS) { - break; + + final int firstFilterDoc = filterIter.nextDoc(); + + final Bits filterAcceptDocs = filterDocIdSet.bits(); + final boolean useRandomAccess = (filterAcceptDocs != null && firstFilterDoc < filterRandomAccessThreshold); + + // if we are using random access, we collect normally, otherwise, + // we are gonna advance() this scorer, so we set inorder=true/toplevel=false + if (useRandomAccess) { + final Scorer scorer = weight.scorer(context, !collector.acceptsDocsOutOfOrder(), true, filterAcceptDocs); + if (scorer == null) return; + scorer.score(collector); + } else { + final Scorer scorer = weight.scorer(context, true, false, null); + if (scorer == null) return; + int docID = scorer.docID(); + assert docID == -1 || docID == DocIdSetIterator.NO_MORE_DOCS; + int filterDoc = firstFilterDoc; + int scorerDoc = scorer.advance(filterDoc); + collector.setScorer(scorer); + while (true) { + if (scorerDoc == filterDoc) { + // Check if scorer has exhausted, only before collecting. + if (scorerDoc == DocIdSetIterator.NO_MORE_DOCS) { + break; + } + collector.collect(scorerDoc); + filterDoc = filterIter.nextDoc(); + scorerDoc = scorer.advance(filterDoc); + } else if (scorerDoc > filterDoc) { + filterDoc = filterIter.advance(scorerDoc); + } else { + scorerDoc = scorer.advance(filterDoc); } - collector.collect(scorerDoc); - filterDoc = filterIter.nextDoc(); - scorerDoc = scorer.advance(filterDoc); - } else if (scorerDoc > filterDoc) { - filterDoc = filterIter.advance(scorerDoc); - } else { - scorerDoc = scorer.advance(filterDoc); } } } + private int filterRandomAccessThreshold = 100; + + /** + * Expert: + * + * @return Threshold used to heuristics to determine if a Filter is dense or sparse. + * @see #setFilterRandomAccessThreshold(int) + * @lucene.experimental + */ + public int getFilterRandomAccessThreshold() { + return filterRandomAccessThreshold; + } + + /** + * Expert: Sets the threshold used in the heuristics to determine if a + * Filter is dense or sparse (and therefore whether random-access should + * be used or not). If a document is found in the Filter beneath this + * threshold, it is assumed to be dense. + * + * @param value Threshold to be used in this IndexSearcher + * @lucene.experimental + */ + public void setFilterRandomAccessThreshold(int value) { + filterRandomAccessThreshold = value; + } + /** Expert: called to re-write queries into primitive queries. * @throws BooleanQuery.TooManyClauses */ Index: lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java =================================================================== --- lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java (revision 1179309) +++ lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java (working copy) @@ -438,7 +438,7 @@ for (QueryAndLimit ent : queriesIter) { Query query = ent.query; int limit = ent.limit; - final DocIdSet docs = new QueryWrapperFilter(query).getDocIdSet(readerContext); + final DocIdSet docs = new QueryWrapperFilter(query).getDocIdSet(readerContext, readerContext.reader.getLiveDocs()); if (docs != null) { final DocIdSetIterator it = docs.iterator(); if (it != null) { @@ -448,11 +448,8 @@ break; reader.deleteDocument(doc); - // TODO: we could/should change - // reader.deleteDocument to return boolean - // true if it did in fact delete, because here - // we could be deleting an already-deleted doc - // which makes this an upper bound: + // as we use getLiveDocs() to filter out already deleted documents, + // we only delete live documents, so the counting is right: delCount++; } } Index: lucene/src/java/org/apache/lucene/util/DocIdBitSet.java =================================================================== --- lucene/src/java/org/apache/lucene/util/DocIdBitSet.java (revision 1179309) +++ lucene/src/java/org/apache/lucene/util/DocIdBitSet.java (working copy) @@ -24,8 +24,8 @@ /** Simple DocIdSet and DocIdSetIterator backed by a BitSet */ -public class DocIdBitSet extends DocIdSet { - private BitSet bitSet; +public class DocIdBitSet extends DocIdSet implements Bits { + private final BitSet bitSet; public DocIdBitSet(BitSet bitSet) { this.bitSet = bitSet; @@ -36,6 +36,11 @@ return new DocIdBitSetIterator(bitSet); } + @Override + public Bits bits() { + return this; + } + /** This DocIdSet implementation is cacheable. */ @Override public boolean isCacheable() { @@ -46,9 +51,20 @@ * Returns the underlying BitSet. */ public BitSet getBitSet() { - return this.bitSet; + return this.bitSet; } + @Override + public boolean get(int index) { + return bitSet.get(index); + } + + @Override + public int length() { + // the size may not be correct... + return bitSet.size(); + } + private static class DocIdBitSetIterator extends DocIdSetIterator { private int docId; private BitSet bitSet; Index: lucene/src/java/org/apache/lucene/util/FixedBitSet.java =================================================================== --- lucene/src/java/org/apache/lucene/util/FixedBitSet.java (revision 1179309) +++ lucene/src/java/org/apache/lucene/util/FixedBitSet.java (working copy) @@ -67,6 +67,11 @@ } @Override + public Bits bits() { + return this; + } + + @Override public int length() { return numBits; } Index: lucene/src/java/org/apache/lucene/util/OpenBitSet.java =================================================================== --- lucene/src/java/org/apache/lucene/util/OpenBitSet.java (revision 1179309) +++ lucene/src/java/org/apache/lucene/util/OpenBitSet.java (working copy) @@ -119,6 +119,11 @@ return new OpenBitSetIterator(bits, wlen); } + @Override + public Bits bits() { + return this; + } + /** This DocIdSet implementation is cacheable. */ @Override public boolean isCacheable() { Index: lucene/src/test-framework/org/apache/lucene/search/CachingWrapperFilterHelper.java =================================================================== --- lucene/src/test-framework/org/apache/lucene/search/CachingWrapperFilterHelper.java (revision 1179309) +++ lucene/src/test-framework/org/apache/lucene/search/CachingWrapperFilterHelper.java (working copy) @@ -22,6 +22,7 @@ import junit.framework.Assert; import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.util.Bits; /** * A unit test helper class to test when the filter is getting cached and when it is not. @@ -42,10 +43,10 @@ } @Override - public synchronized DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + public synchronized DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { final int saveMissCount = missCount; - DocIdSet docIdSet = super.getDocIdSet(context); + DocIdSet docIdSet = super.getDocIdSet(context, acceptDocs); if (shouldHaveCache) { Assert.assertEquals("Cache should have data ", saveMissCount, missCount); Index: lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java =================================================================== --- lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java (revision 1179309) +++ lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java (working copy) @@ -1302,6 +1302,7 @@ } IndexSearcher ret = random.nextBoolean() ? new AssertingIndexSearcher(r) : new AssertingIndexSearcher(r.getTopReaderContext()); ret.setSimilarityProvider(similarityProvider); + ret.setFilterRandomAccessThreshold(_TestUtil.nextInt(random, 1, 200)); return ret; } else { int threads = 0; @@ -1326,6 +1327,7 @@ } }; ret.setSimilarityProvider(similarityProvider); + ret.setFilterRandomAccessThreshold(_TestUtil.nextInt(random, 1, 200)); return ret; } }