Index: lucene/contrib/misc/src/java/org/apache/lucene/index/PKIndexSplitter.java =================================================================== --- lucene/contrib/misc/src/java/org/apache/lucene/index/PKIndexSplitter.java (revision 1180003) +++ lucene/contrib/misc/src/java/org/apache/lucene/index/PKIndexSplitter.java (working copy) @@ -121,7 +121,8 @@ final int maxDoc = in.maxDoc(); final FixedBitSet bits = new FixedBitSet(maxDoc); - final DocIdSet docs = preserveFilter.getDocIdSet((AtomicReaderContext) in.getTopReaderContext()); + // ignore livedocs here, as we filter them later: + final DocIdSet docs = preserveFilter.getDocIdSet((AtomicReaderContext) in.getTopReaderContext(), null); if (docs != null) { final DocIdSetIterator it = docs.iterator(); if (it != null) { Index: lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/DuplicateFilter.java =================================================================== --- lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/DuplicateFilter.java (revision 1180003) +++ lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/DuplicateFilter.java (working copy) @@ -70,17 +70,16 @@ } @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { if (processingMode == ProcessingMode.PM_FAST_INVALIDATION) { - return fastBits(context.reader); + return fastBits(context.reader, acceptDocs); } else { - return correctBits(context.reader); + return correctBits(context.reader, acceptDocs); } } - private FixedBitSet correctBits(IndexReader reader) throws IOException { + private FixedBitSet correctBits(IndexReader reader, Bits acceptDocs) throws IOException { FixedBitSet bits = new FixedBitSet(reader.maxDoc()); //assume all are INvalid - final Bits liveDocs = MultiFields.getLiveDocs(reader); Terms terms = reader.fields().terms(fieldName); if (terms == null) { @@ -94,7 +93,7 @@ if (currTerm == null) { break; } else { - docs = termsEnum.docs(liveDocs, docs); + docs = termsEnum.docs(acceptDocs, docs); int doc = docs.nextDoc(); if (doc != DocsEnum.NO_MORE_DOCS) { if (keepMode == KeepMode.KM_USE_FIRST_OCCURRENCE) { @@ -116,10 +115,9 @@ return bits; } - private FixedBitSet fastBits(IndexReader reader) throws IOException { + private FixedBitSet fastBits(IndexReader reader, Bits acceptDocs) throws IOException { FixedBitSet bits = new FixedBitSet(reader.maxDoc()); bits.set(0, reader.maxDoc()); //assume all are valid - final Bits liveDocs = MultiFields.getLiveDocs(reader); Terms terms = reader.fields().terms(fieldName); if (terms == null) { @@ -135,7 +133,7 @@ } else { if (termsEnum.docFreq() > 1) { // unset potential duplicates - docs = termsEnum.docs(liveDocs, docs); + docs = termsEnum.docs(acceptDocs, docs); int doc = docs.nextDoc(); if (doc != DocsEnum.NO_MORE_DOCS) { if (keepMode == KeepMode.KM_USE_FIRST_OCCURRENCE) { Index: lucene/contrib/spatial/src/java/org/apache/lucene/spatial/geohash/GeoHashDistanceFilter.java =================================================================== --- lucene/contrib/spatial/src/java/org/apache/lucene/spatial/geohash/GeoHashDistanceFilter.java (revision 1180003) +++ lucene/contrib/spatial/src/java/org/apache/lucene/spatial/geohash/GeoHashDistanceFilter.java (working copy) @@ -26,6 +26,7 @@ import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.FilteredDocIdSet; import org.apache.lucene.spatial.DistanceUtils; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.spatial.tier.DistanceFilter; @@ -57,7 +58,7 @@ } @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { final DocTerms geoHashValues = FieldCache.DEFAULT.getTerms(context.reader, geoHashField); final BytesRef br = new BytesRef(); @@ -65,7 +66,7 @@ final int docBase = nextDocBase; nextDocBase += context.reader.maxDoc(); - return new FilteredDocIdSet(startingFilter.getDocIdSet(context)) { + return new FilteredDocIdSet(startingFilter.getDocIdSet(context, acceptDocs)) { @Override public boolean match(int doc) { Index: lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/CartesianShapeFilter.java =================================================================== --- lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/CartesianShapeFilter.java (revision 1180003) +++ lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/CartesianShapeFilter.java (working copy) @@ -45,8 +45,7 @@ } @Override - public DocIdSet getDocIdSet(final AtomicReaderContext context) throws IOException { - final Bits liveDocs = context.reader.getLiveDocs(); + public DocIdSet getDocIdSet(final AtomicReaderContext context, final Bits acceptDocs) throws IOException { final List area = shape.getArea(); final int sz = area.size(); @@ -58,7 +57,7 @@ return new DocIdSet() { @Override public DocIdSetIterator iterator() throws IOException { - return context.reader.termDocsEnum(liveDocs, fieldName, bytesRef); + return context.reader.termDocsEnum(acceptDocs, fieldName, bytesRef); } @Override @@ -71,7 +70,7 @@ for (int i =0; i< sz; i++) { double boxId = area.get(i).doubleValue(); NumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(boxId), 0, bytesRef); - final DocsEnum docsEnum = context.reader.termDocsEnum(liveDocs, fieldName, bytesRef); + final DocsEnum docsEnum = context.reader.termDocsEnum(acceptDocs, fieldName, bytesRef); if (docsEnum == null) continue; // iterate through all documents // which have this boxId Index: lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/LatLongDistanceFilter.java =================================================================== --- lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/LatLongDistanceFilter.java (revision 1180003) +++ lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/LatLongDistanceFilter.java (working copy) @@ -24,6 +24,7 @@ import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.Filter; import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.util.Bits; import org.apache.lucene.spatial.DistanceUtils; @@ -60,7 +61,7 @@ } @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { final double[] latIndex = FieldCache.DEFAULT.getDoubles(context.reader, latField); final double[] lngIndex = FieldCache.DEFAULT.getDoubles(context.reader, lngField); @@ -68,7 +69,7 @@ final int docBase = nextDocBase; nextDocBase += context.reader.maxDoc(); - return new FilteredDocIdSet(startingFilter.getDocIdSet(context)) { + return new FilteredDocIdSet(startingFilter.getDocIdSet(context, acceptDocs)) { @Override protected boolean match(int doc) { double x = latIndex[doc]; Index: lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java =================================================================== --- lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java (revision 1180003) +++ lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java (working copy) @@ -105,7 +105,7 @@ AtomicReaderContext[] leaves = ReaderUtil.leaves(r.getTopReaderContext()); for (int i = 0; i < leaves.length; i++) { - f.getDocIdSet(leaves[i]); + f.getDocIdSet(leaves[i], leaves[i].reader.getLiveDocs()); } r.close(); } Index: lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java =================================================================== --- lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java (revision 1180003) +++ lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java (working copy) @@ -438,7 +438,7 @@ for (QueryAndLimit ent : queriesIter) { Query query = ent.query; int limit = ent.limit; - final DocIdSet docs = new QueryWrapperFilter(query).getDocIdSet(readerContext); + final DocIdSet docs = new QueryWrapperFilter(query).getDocIdSet(readerContext, readerContext.reader.getLiveDocs()); if (docs != null) { final DocIdSetIterator it = docs.iterator(); if (it != null) { @@ -448,11 +448,8 @@ break; reader.deleteDocument(doc); - // TODO: we could/should change - // reader.deleteDocument to return boolean - // true if it did in fact delete, because here - // we could be deleting an already-deleted doc - // which makes this an upper bound: + // as we use getLiveDocs() to filter out already deleted documents, + // we only delete live documents, so the counting is right: delCount++; } } Index: lucene/src/java/org/apache/lucene/search/BitsFilteredDocIdSet.java =================================================================== --- lucene/src/java/org/apache/lucene/search/BitsFilteredDocIdSet.java (revision 0) +++ lucene/src/java/org/apache/lucene/search/BitsFilteredDocIdSet.java (revision 0) @@ -0,0 +1,63 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.Bits; + +/** + * This implementation supplies a filtered DocIdSet, that excludes all + * docids which are not in a Bits instance. This is especially useful in + * {@link org.apache.lucene.search.Filter} to apply the {@code acceptDocs} + * passed to {@code getDocIdSet()} before returning the final DocIdSet. + * + * @see DocIdSet + * @see org.apache.lucene.search.Filter + */ + +public final class BitsFilteredDocIdSet extends FilteredDocIdSet { + + private final Bits acceptDocs; + + /** + * Convenience wrapper method: If {@code acceptDocs == null} it returns the original set without wrapping. + * @param set Underlying DocIdSet. If {@code null}, this method returns {@code null} + * @param acceptDocs Allowed docs, all docids not in this set will not be returned by this DocIdSet. + * If {@code null}, this method returns the original set without wrapping. + */ + public static DocIdSet wrap(DocIdSet set, Bits acceptDocs) { + return (set == null || acceptDocs == null) ? set : new BitsFilteredDocIdSet(set, acceptDocs); + } + + /** + * Constructor. + * @param innerSet Underlying DocIdSet + * @param acceptDocs Allowed docs, all docids not in this set will not be returned by this DocIdSet + */ + public BitsFilteredDocIdSet(DocIdSet innerSet, Bits acceptDocs) { + super(innerSet); + if (acceptDocs == null) + throw new NullPointerException("acceptDocs is null"); + this.acceptDocs = acceptDocs; + } + + @Override + protected boolean match(int docid) { + return acceptDocs.get(docid); + } + +} Property changes on: lucene\src\java\org\apache\lucene\search\BitsFilteredDocIdSet.java ___________________________________________________________________ Added: svn:keywords + Date Author Id Revision HeadURL Added: svn:eol-style + native Index: lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java (revision 1180003) +++ lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java (working copy) @@ -19,8 +19,11 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.FixedBitSet; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; /** * Wraps another SpanFilter's result and caches it. The purpose is to allow @@ -40,61 +43,84 @@ * @param filter Filter to cache results of */ public CachingSpanFilter(SpanFilter filter) { - this(filter, CachingWrapperFilter.DeletesMode.RECACHE); + this.filter = filter; + this.cache = new CachingWrapperFilter.FilterCache(); } - /** - * @param filter Filter to cache results of - * @param deletesMode See {@link CachingWrapperFilter.DeletesMode} - */ - public CachingSpanFilter(SpanFilter filter, CachingWrapperFilter.DeletesMode deletesMode) { - this.filter = filter; - if (deletesMode == CachingWrapperFilter.DeletesMode.DYNAMIC) { - throw new IllegalArgumentException("DeletesMode.DYNAMIC is not supported"); - } - this.cache = new CachingWrapperFilter.FilterCache(deletesMode) { - @Override - protected SpanFilterResult mergeLiveDocs(final Bits liveDocs, final SpanFilterResult value) { - throw new IllegalStateException("DeletesMode.DYNAMIC is not supported"); - } - }; + @Override + public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException { + final SpanFilterResult result = getCachedResult(context); + return BitsFilteredDocIdSet.wrap(result.getDocIdSet(), acceptDocs); } @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { - SpanFilterResult result = getCachedResult(context); - return result != null ? result.getDocIdSet() : null; + public SpanFilterResult bitSpans(AtomicReaderContext context, final Bits acceptDocs) throws IOException { + final SpanFilterResult result = getCachedResult(context); + if (acceptDocs == null) { + return result; + } else { + // nocommit: filter positions + List allPositions = result.getPositions(); + List positions = new ArrayList(allPositions.size() / 2 + 1); + for (SpanFilterResult.PositionInfo p : allPositions) { + if (acceptDocs.get(p.getDoc())) { + positions.add(p); + } + } + return new SpanFilterResult(BitsFilteredDocIdSet.wrap(result.getDocIdSet(), acceptDocs), positions); + } } + + /** Provide the DocIdSet to be cached, using the DocIdSet provided + * by the wrapped Filter. + *

This implementation returns the given {@link DocIdSet}, if {@link DocIdSet#isCacheable} + * returns true, else it copies the {@link DocIdSetIterator} into + * an {@link FixedBitSet}. + */ + protected SpanFilterResult spanFilterResultToCache(SpanFilterResult result, IndexReader reader) throws IOException { + if (result == null || result.getDocIdSet() == null) { + // this is better than returning null, as the nonnull result can be cached + return SpanFilterResult.EMPTY_SPAN_FILTER_RESULT; + } else if (result.getDocIdSet().isCacheable()) { + return result; + } else { + final DocIdSetIterator it = result.getDocIdSet().iterator(); + // null is allowed to be returned by iterator(), + // in this case we wrap with the empty set, + // which is cacheable. + if (it == null) { + return SpanFilterResult.EMPTY_SPAN_FILTER_RESULT; + } else { + final FixedBitSet bits = new FixedBitSet(reader.maxDoc()); + bits.or(it); + return new SpanFilterResult(bits, result.getPositions()); + } + } + } // for testing int hitCount, missCount; private SpanFilterResult getCachedResult(AtomicReaderContext context) throws IOException { final IndexReader reader = context.reader; - final Object coreKey = reader.getCoreCacheKey(); - final Object delCoreKey = reader.hasDeletions() ? reader.getLiveDocs() : coreKey; - SpanFilterResult result = cache.get(reader, coreKey, delCoreKey); + SpanFilterResult result = cache.get(reader, coreKey); if (result != null) { hitCount++; return result; + } else { + missCount++; + // cache miss: we use no acceptDocs here + // (this saves time on building SpanFilterResult, the acceptDocs will be applied on the cached set) + result = spanFilterResultToCache(filter.bitSpans(context, null/**!!!*/), reader); + cache.put(coreKey, result); } - - missCount++; - result = filter.bitSpans(context); - - cache.put(coreKey, delCoreKey, result); + return result; } - @Override - public SpanFilterResult bitSpans(AtomicReaderContext context) throws IOException { - return getCachedResult(context); - } - - @Override public String toString() { return "CachingSpanFilter("+filter+")"; } Index: lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java (revision 1180003) +++ lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java (working copy) @@ -67,7 +67,7 @@ protected final FilterCache cache; - static abstract class FilterCache { + static class FilterCache { /** * A transient Filter cache (package private because of test) @@ -76,97 +76,29 @@ // after de-serialize transient Map cache; - private final DeletesMode deletesMode; - - public FilterCache(DeletesMode deletesMode) { - this.deletesMode = deletesMode; - } - - public synchronized T get(IndexReader reader, Object coreKey, Object delCoreKey) throws IOException { + public synchronized T get(IndexReader reader, Object coreKey) throws IOException { T value; if (cache == null) { cache = new WeakHashMap(); } - if (deletesMode == DeletesMode.IGNORE) { - // key on core - value = cache.get(coreKey); - } else if (deletesMode == DeletesMode.RECACHE) { - // key on deletes, if any, else core - value = cache.get(delCoreKey); - } else { - - assert deletesMode == DeletesMode.DYNAMIC; - - // first try for exact match - value = cache.get(delCoreKey); - - if (value == null) { - // now for core match, but dynamically AND - // live docs - value = cache.get(coreKey); - if (value != null) { - final Bits liveDocs = reader.getLiveDocs(); - if (liveDocs != null) { - value = mergeLiveDocs(liveDocs, value); - } - } - } - } - - return value; + return cache.get(coreKey); } - protected abstract T mergeLiveDocs(Bits liveDocs, T value); - - public synchronized void put(Object coreKey, Object delCoreKey, T value) { - if (deletesMode == DeletesMode.IGNORE) { - cache.put(coreKey, value); - } else if (deletesMode == DeletesMode.RECACHE) { - cache.put(delCoreKey, value); - } else { - cache.put(coreKey, value); - cache.put(delCoreKey, value); - } + public synchronized void put(Object coreKey, T value) { + cache.put(coreKey, value); } } /** * New deletes are ignored by default, which gives higher - * cache hit rate on reopened readers. Most of the time - * this is safe, because the filter will be AND'd with a - * Query that fully enforces deletions. If instead you - * need this filter to always enforce deletions, pass - * either {@link DeletesMode#RECACHE} or {@link - * DeletesMode#DYNAMIC}. + * cache hit rate on reopened readers. * @param filter Filter to cache results of */ public CachingWrapperFilter(Filter filter) { - this(filter, DeletesMode.IGNORE); - } - - /** - * Expert: by default, the cached filter will be shared - * across reopened segments that only had changes to their - * deletions. - * - * @param filter Filter to cache results of - * @param deletesMode See {@link DeletesMode} - */ - public CachingWrapperFilter(Filter filter, DeletesMode deletesMode) { this.filter = filter; - cache = new FilterCache(deletesMode) { - @Override - public DocIdSet mergeLiveDocs(final Bits liveDocs, final DocIdSet docIdSet) { - return new FilteredDocIdSet(docIdSet) { - @Override - protected boolean match(int docID) { - return liveDocs.get(docID); - } - }; - } - }; + cache = new FilterCache(); } /** Provide the DocIdSet to be cached, using the DocIdSet provided @@ -200,27 +132,23 @@ int hitCount, missCount; @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException { final IndexReader reader = context.reader; final Object coreKey = reader.getCoreCacheKey(); - final Object delCoreKey = reader.hasDeletions() ? reader.getLiveDocs() : coreKey; - DocIdSet docIdSet = cache.get(reader, coreKey, delCoreKey); + DocIdSet docIdSet = cache.get(reader, coreKey); if (docIdSet != null) { hitCount++; return docIdSet; + } else { + missCount++; + // cache miss: we use no acceptDocs here + // (this saves time on building DocIdSet, the acceptDocs will be applied on the cached set) + docIdSet = docIdSetToCache(filter.getDocIdSet(context, null/**!!!*/), reader); + cache.put(coreKey, docIdSet); } - - missCount++; - - // cache miss - docIdSet = docIdSetToCache(filter.getDocIdSet(context), reader); - - if (docIdSet != null) { - cache.put(coreKey, delCoreKey, docIdSet); - } - return docIdSet; + return BitsFilteredDocIdSet.wrap(docIdSet, acceptDocs); } @Override Index: lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java (revision 1180003) +++ lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java (working copy) @@ -128,11 +128,11 @@ @Override public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, - boolean topScorer, Bits acceptDocs) throws IOException { + boolean topScorer, final Bits acceptDocs) throws IOException { final DocIdSetIterator disi; if (filter != null) { assert query == null; - final DocIdSet dis = filter.getDocIdSet(context); + final DocIdSet dis = filter.getDocIdSet(context, acceptDocs); if (dis == null) { return null; } Index: lucene/src/java/org/apache/lucene/search/DocIdSet.java =================================================================== --- lucene/src/java/org/apache/lucene/search/DocIdSet.java (revision 1180003) +++ lucene/src/java/org/apache/lucene/search/DocIdSet.java (working copy) @@ -18,6 +18,7 @@ */ import java.io.IOException; +import org.apache.lucene.util.Bits; /** * A DocIdSet contains a set of doc ids. Implementing classes must @@ -46,6 +47,12 @@ public boolean isCacheable() { return true; } + + // we explicitely provide no random access, as this filter is 100% sparse and iterator exits faster + @Override + public Bits bits() throws IOException { + return null; + } }; /** Provides a {@link DocIdSetIterator} to access the set. @@ -54,6 +61,13 @@ * are no docs that match. */ public abstract DocIdSetIterator iterator() throws IOException; + /** Optionally provides a {@link Bits} interface for random access. + * Returns {@code null}, if this DocIdSet does not support random access. + * The default implementation does not provide random access */ + public Bits bits() throws IOException { + return null; + } + /** * This method is a hint for {@link CachingWrapperFilter}, if this DocIdSet * should be cached without copying it into a BitSet. The default is to return Index: lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java (revision 1180003) +++ lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java (working copy) @@ -73,7 +73,7 @@ /** This method is implemented for each data type */ @Override - public abstract DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException; + public abstract DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException; /** * Creates a string range filter using {@link FieldCache#getTermsIndex}. This works with all @@ -83,7 +83,7 @@ public static FieldCacheRangeFilter newStringRange(String field, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) { return new FieldCacheRangeFilter(field, null, lowerVal, upperVal, includeLower, includeUpper) { @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { final FieldCache.DocTermsIndex fcsi = FieldCache.DEFAULT.getTermsIndex(context.reader, field); final BytesRef spare = new BytesRef(); final int lowerPoint = fcsi.binarySearchLookup(lowerVal == null ? null : new BytesRef(lowerVal), spare); @@ -122,9 +122,7 @@ assert inclusiveLowerPoint > 0 && inclusiveUpperPoint > 0; - // for this DocIdSet, we can ignore deleted docs - // because deleted docs have an order of 0 (null entry in StringIndex) - return new FieldCacheDocIdSet(context.reader, true) { + return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) { @Override final boolean matchDoc(int doc) { final int docOrd = fcsi.getOrd(doc); @@ -152,7 +150,7 @@ public static FieldCacheRangeFilter newByteRange(String field, FieldCache.ByteParser parser, Byte lowerVal, Byte upperVal, boolean includeLower, boolean includeUpper) { return new FieldCacheRangeFilter(field, parser, lowerVal, upperVal, includeLower, includeUpper) { @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { final byte inclusiveLowerPoint, inclusiveUpperPoint; if (lowerVal != null) { final byte i = lowerVal.byteValue(); @@ -175,8 +173,7 @@ return DocIdSet.EMPTY_DOCIDSET; final byte[] values = FieldCache.DEFAULT.getBytes(context.reader, field, (FieldCache.ByteParser) parser); - // we only respect deleted docs if the range contains 0 - return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)) { + return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) { @Override boolean matchDoc(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; @@ -203,7 +200,7 @@ public static FieldCacheRangeFilter newShortRange(String field, FieldCache.ShortParser parser, Short lowerVal, Short upperVal, boolean includeLower, boolean includeUpper) { return new FieldCacheRangeFilter(field, parser, lowerVal, upperVal, includeLower, includeUpper) { @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { final short inclusiveLowerPoint, inclusiveUpperPoint; if (lowerVal != null) { short i = lowerVal.shortValue(); @@ -226,8 +223,7 @@ return DocIdSet.EMPTY_DOCIDSET; final short[] values = FieldCache.DEFAULT.getShorts(context.reader, field, (FieldCache.ShortParser) parser); - // ignore deleted docs if range doesn't contain 0 - return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)) { + return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) { @Override boolean matchDoc(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; @@ -254,7 +250,7 @@ public static FieldCacheRangeFilter newIntRange(String field, FieldCache.IntParser parser, Integer lowerVal, Integer upperVal, boolean includeLower, boolean includeUpper) { return new FieldCacheRangeFilter(field, parser, lowerVal, upperVal, includeLower, includeUpper) { @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { final int inclusiveLowerPoint, inclusiveUpperPoint; if (lowerVal != null) { int i = lowerVal.intValue(); @@ -277,8 +273,7 @@ return DocIdSet.EMPTY_DOCIDSET; final int[] values = FieldCache.DEFAULT.getInts(context.reader, field, (FieldCache.IntParser) parser); - // ignore deleted docs if range doesn't contain 0 - return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)) { + return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) { @Override boolean matchDoc(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; @@ -305,7 +300,7 @@ public static FieldCacheRangeFilter newLongRange(String field, FieldCache.LongParser parser, Long lowerVal, Long upperVal, boolean includeLower, boolean includeUpper) { return new FieldCacheRangeFilter(field, parser, lowerVal, upperVal, includeLower, includeUpper) { @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { final long inclusiveLowerPoint, inclusiveUpperPoint; if (lowerVal != null) { long i = lowerVal.longValue(); @@ -328,8 +323,7 @@ return DocIdSet.EMPTY_DOCIDSET; final long[] values = FieldCache.DEFAULT.getLongs(context.reader, field, (FieldCache.LongParser) parser); - // ignore deleted docs if range doesn't contain 0 - return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0L && inclusiveUpperPoint >= 0L)) { + return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) { @Override boolean matchDoc(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; @@ -356,7 +350,7 @@ public static FieldCacheRangeFilter newFloatRange(String field, FieldCache.FloatParser parser, Float lowerVal, Float upperVal, boolean includeLower, boolean includeUpper) { return new FieldCacheRangeFilter(field, parser, lowerVal, upperVal, includeLower, includeUpper) { @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { // we transform the floating point numbers to sortable integers // using NumericUtils to easier find the next bigger/lower value final float inclusiveLowerPoint, inclusiveUpperPoint; @@ -383,8 +377,7 @@ return DocIdSet.EMPTY_DOCIDSET; final float[] values = FieldCache.DEFAULT.getFloats(context.reader, field, (FieldCache.FloatParser) parser); - // ignore deleted docs if range doesn't contain 0 - return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0.0f && inclusiveUpperPoint >= 0.0f)) { + return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) { @Override boolean matchDoc(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; @@ -411,7 +404,7 @@ public static FieldCacheRangeFilter newDoubleRange(String field, FieldCache.DoubleParser parser, Double lowerVal, Double upperVal, boolean includeLower, boolean includeUpper) { return new FieldCacheRangeFilter(field, parser, lowerVal, upperVal, includeLower, includeUpper) { @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { // we transform the floating point numbers to sortable integers // using NumericUtils to easier find the next bigger/lower value final double inclusiveLowerPoint, inclusiveUpperPoint; @@ -439,7 +432,7 @@ final double[] values = FieldCache.DEFAULT.getDoubles(context.reader, field, (FieldCache.DoubleParser) parser); // ignore deleted docs if range doesn't contain 0 - return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0.0 && inclusiveUpperPoint >= 0.0)) { + return new FieldCacheDocIdSet(context.reader.maxDoc(), acceptDocs) { @Override boolean matchDoc(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; @@ -506,12 +499,12 @@ public FieldCache.Parser getParser() { return parser; } static abstract class FieldCacheDocIdSet extends DocIdSet { - private final IndexReader reader; - private final boolean canIgnoreDeletedDocs; + private final int maxDoc; + private final Bits acceptDocs; - FieldCacheDocIdSet(IndexReader reader, boolean canIgnoreDeletedDocs) { - this.reader = reader; - this.canIgnoreDeletedDocs = canIgnoreDeletedDocs; + FieldCacheDocIdSet(int maxDoc, Bits acceptDocs) { + this.maxDoc = maxDoc; + this.acceptDocs = acceptDocs; } /** @@ -530,11 +523,29 @@ } @Override - public DocIdSetIterator iterator() throws IOException { + public Bits bits() { + return (acceptDocs == null) ? new Bits() { + public boolean get(int docid) { + return FieldCacheDocIdSet.this.matchDoc(docid); + } - final Bits liveDocs = canIgnoreDeletedDocs ? null : reader.getLiveDocs(); + public int length() { + return FieldCacheDocIdSet.this.maxDoc; + } + } : new Bits() { + public boolean get(int docid) { + return acceptDocs.get(docid) && FieldCacheDocIdSet.this.matchDoc(docid); + } - if (liveDocs == null) { + public int length() { + return FieldCacheDocIdSet.this.maxDoc; + } + }; + } + + @Override + public DocIdSetIterator iterator() throws IOException { + if (acceptDocs == null) { // Specialization optimization disregard deletions return new DocIdSetIterator() { private int doc = -1; @@ -569,12 +580,10 @@ } }; } else { - // Must consult deletions + // Must consult acceptDocs - final int maxDoc = reader.maxDoc(); - // a DocIdSetIterator generating docIds by - // incrementing a variable & checking liveDocs - + // incrementing a variable & checking acceptDocs - return new DocIdSetIterator() { private int doc = -1; @Override @@ -589,14 +598,14 @@ if (doc >= maxDoc) { return doc = NO_MORE_DOCS; } - } while (!liveDocs.get(doc) || !matchDoc(doc)); + } while (!acceptDocs.get(doc) || !matchDoc(doc)); return doc; } @Override public int advance(int target) { for(doc=target;doc 0) { - bits.set(termNumber); - } + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { + final FieldCache.DocTermsIndex fcsi = getFieldCache().getTermsIndex(context.reader, field); + final FixedBitSet bits = new FixedBitSet(fcsi.numOrd()); + final BytesRef spare = new BytesRef(); + for (int i=0;i 0) { + bits.set(termNumber); } } - - @Override - public DocIdSetIterator iterator() { - return new FieldCacheTermsFilterDocIdSetIterator(); - } - - /** This DocIdSet implementation is cacheable. */ - @Override - public boolean isCacheable() { - return true; - } - - protected class FieldCacheTermsFilterDocIdSetIterator extends DocIdSetIterator { - private int doc = -1; - + final int maxDoc = context.reader.maxDoc(); + return new FieldCacheRangeFilter.FieldCacheDocIdSet(maxDoc, acceptDocs) { @Override - public int docID() { - return doc; + boolean matchDoc(int doc) { + return bits.get(fcsi.getOrd(doc)); } - - @Override - public int nextDoc() { - try { - while (!bits.get(fcsi.getOrd(++doc))) {} - } catch (ArrayIndexOutOfBoundsException e) { - doc = NO_MORE_DOCS; - } - return doc; - } - - @Override - public int advance(int target) { - try { - doc = target; - while (!bits.get(fcsi.getOrd(doc))) { - doc++; - } - } catch (ArrayIndexOutOfBoundsException e) { - doc = NO_MORE_DOCS; - } - return doc; - } - } + }; } } Index: lucene/src/java/org/apache/lucene/search/Filter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/Filter.java (revision 1180003) +++ lucene/src/java/org/apache/lucene/search/Filter.java (working copy) @@ -21,7 +21,7 @@ import org.apache.lucene.index.IndexReader; // javadocs import org.apache.lucene.index.IndexReader.AtomicReaderContext; -import org.apache.lucene.util.DocIdBitSet; +import org.apache.lucene.util.Bits; /** * Abstract base class for restricting which documents may @@ -44,14 +44,16 @@ * represent the whole underlying index i.e. if the index has more than * one segment the given reader only represents a single segment. * The provided context is always an atomic context, so you can call - * {@link IndexReader#fields()} or {@link IndexReader#getLiveDocs()} + * {@link IndexReader#fields()} * on the context's reader, for example. + * + * @param acceptDocs + * Bits that represent the allowable docs to match (typically deleted docs + * but possibly filtering other documents) * * @return a DocIdSet that provides the documents which should be permitted or * prohibited in search results. NOTE: null can be returned if * no documents will be accepted by this Filter. - * - * @see DocIdBitSet */ - public abstract DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException; + public abstract DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException; } Index: lucene/src/java/org/apache/lucene/search/FilteredDocIdSet.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FilteredDocIdSet.java (revision 1180003) +++ lucene/src/java/org/apache/lucene/search/FilteredDocIdSet.java (working copy) @@ -18,6 +18,7 @@ */ import java.io.IOException; +import org.apache.lucene.util.Bits; /** * Abstract decorator class for a DocIdSet implementation @@ -54,14 +55,28 @@ public boolean isCacheable() { return _innerSet.isCacheable(); } + + @Override + public Bits bits() throws IOException { + final Bits bits = _innerSet.bits(); + return (bits == null) ? null : new Bits() { + public boolean get(int docid) { + return bits.get(docid) && FilteredDocIdSet.this.match(docid); + } + public int length() { + return bits.length(); + } + }; + } + /** * Validation method to determine whether a docid should be in the result set. * @param docid docid to be tested * @return true if input docid should be in the result set, false otherwise. */ - protected abstract boolean match(int docid) throws IOException; - + protected abstract boolean match(int docid); + /** * Implementation of the contract to build a DocIdSetIterator. * @see DocIdSetIterator @@ -71,7 +86,7 @@ public DocIdSetIterator iterator() throws IOException { return new FilteredDocIdSetIterator(_innerSet.iterator()) { @Override - protected boolean match(int docid) throws IOException { + protected boolean match(int docid) { return FilteredDocIdSet.this.match(docid); } }; Index: lucene/src/java/org/apache/lucene/search/FilteredDocIdSetIterator.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FilteredDocIdSetIterator.java (revision 1180003) +++ lucene/src/java/org/apache/lucene/search/FilteredDocIdSetIterator.java (working copy) @@ -47,7 +47,7 @@ * @return true if input docid should be in the result set, false otherwise. * @see #FilteredDocIdSetIterator(DocIdSetIterator) */ - abstract protected boolean match(int doc) throws IOException; + protected abstract boolean match(int doc); @Override public int docID() { Index: lucene/src/java/org/apache/lucene/search/FilteredQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FilteredQuery.java (revision 1180003) +++ lucene/src/java/org/apache/lucene/search/FilteredQuery.java (working copy) @@ -79,7 +79,7 @@ public Explanation explain (AtomicReaderContext ir, int i) throws IOException { Explanation inner = weight.explain (ir, i); Filter f = FilteredQuery.this.filter; - DocIdSet docIdSet = f.getDocIdSet(ir); + DocIdSet docIdSet = f.getDocIdSet(ir, ir.reader.getLiveDocs()); DocIdSetIterator docIdSetIterator = docIdSet == null ? DocIdSet.EMPTY_DOCIDSET.iterator() : docIdSet.iterator(); if (docIdSetIterator == null) { docIdSetIterator = DocIdSet.EMPTY_DOCIDSET.iterator(); @@ -100,60 +100,102 @@ // return a filtering scorer @Override - public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, - boolean topScorer, Bits acceptDocs) - throws IOException { - // we will advance() the subscorer - final Scorer scorer = weight.scorer(context, true, false, acceptDocs); - if (scorer == null) { + public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException { + assert filter != null; + + final DocIdSet filterDocIdSet = filter.getDocIdSet(context, acceptDocs); + if (filterDocIdSet == null) { + // this means the filter does not accept any documents. return null; } - DocIdSet docIdSet = filter.getDocIdSet(context); - if (docIdSet == null) { + + final DocIdSetIterator filterIter = filterDocIdSet.iterator(); + if (filterIter == null) { + // this means the filter does not accept any documents. return null; } - final DocIdSetIterator docIdSetIterator = docIdSet.iterator(); - if (docIdSetIterator == null) { - return null; - } - return new Scorer(this) { + final int firstFilterDoc = filterIter.nextDoc(); + final Bits filterAcceptDocs = filterDocIdSet.bits(); + final boolean useRandomAccess = (filterAcceptDocs != null && firstFilterDoc < searcher.getFilterRandomAccessThreshold()); - private int doc = -1; - - private int advanceToCommon(int scorerDoc, int disiDoc) throws IOException { - while (scorerDoc != disiDoc) { - if (scorerDoc < disiDoc) { - scorerDoc = scorer.advance(disiDoc); - } else { - disiDoc = docIdSetIterator.advance(scorerDoc); + if (useRandomAccess) { + // if we are using random access, we return the inner scorer, just with other acceptDocs + return weight.scorer(context, scoreDocsInOrder, topScorer, filterAcceptDocs); + } else { + assert firstFilterDoc > -1; + // we are gonna advance() this scorer, so we set inorder=true/toplevel=false + // we pass null as acceptDocs, as our filter has already respected acceptDocs, no need to do twice + final Scorer scorer = weight.scorer(context, true, false, null); + return (scorer == null) ? null : new Scorer(this) { + private int scorerDoc = -1, filterDoc = firstFilterDoc; + + // optimization: we are topScorer and collect directly using short-circuited algo + @Override + public void score(Collector collector) throws IOException { + int filterDoc = firstFilterDoc; + int scorerDoc = scorer.advance(filterDoc); + // the normalization trick already applies the boost of this query, + // so we can use the wrapped scorer directly: + collector.setScorer(scorer); + for (;;) { + if (scorerDoc == filterDoc) { + // Check if scorer has exhausted, only before collecting. + if (scorerDoc == DocIdSetIterator.NO_MORE_DOCS) { + break; + } + collector.collect(scorerDoc); + filterDoc = filterIter.nextDoc(); + scorerDoc = scorer.advance(filterDoc); + } else if (scorerDoc > filterDoc) { + filterDoc = filterIter.advance(scorerDoc); + } else { + scorerDoc = scorer.advance(filterDoc); + } } } - return scorerDoc; - } + + private int advanceToNextCommonDoc() throws IOException { + for (;;) { + if (scorerDoc < filterDoc) { + scorerDoc = scorer.advance(filterDoc); + } else if (scorerDoc == filterDoc) { + return scorerDoc; + } else { + filterDoc = filterIter.advance(scorerDoc); + } + } + } - @Override - public int nextDoc() throws IOException { - int scorerDoc, disiDoc; - return doc = (disiDoc = docIdSetIterator.nextDoc()) != NO_MORE_DOCS - && (scorerDoc = scorer.nextDoc()) != NO_MORE_DOCS - && advanceToCommon(scorerDoc, disiDoc) != NO_MORE_DOCS ? scorer.docID() : NO_MORE_DOCS; - } - - @Override - public int docID() { return doc; } - - @Override - public int advance(int target) throws IOException { - int disiDoc, scorerDoc; - return doc = (disiDoc = docIdSetIterator.advance(target)) != NO_MORE_DOCS - && (scorerDoc = scorer.advance(disiDoc)) != NO_MORE_DOCS - && advanceToCommon(scorerDoc, disiDoc) != NO_MORE_DOCS ? scorer.docID() : NO_MORE_DOCS; - } + @Override + public int nextDoc() throws IOException { + // don't go to next doc on first call + // (because filterIter is already on first doc): + if (scorerDoc != -1) { + filterDoc = filterIter.nextDoc(); + } + return advanceToNextCommonDoc(); + } + + @Override + public int advance(int target) throws IOException { + if (target > filterDoc) { + filterDoc = filterIter.advance(target); + } + return advanceToNextCommonDoc(); + } - @Override - public float score() throws IOException { return scorer.score(); } - }; + @Override + public int docID() { + return scorerDoc; + } + + @Override + public float score() throws IOException { + return scorer.score(); + } + }; + } } }; } Index: lucene/src/java/org/apache/lucene/search/IndexSearcher.java =================================================================== --- lucene/src/java/org/apache/lucene/search/IndexSearcher.java (revision 1180003) +++ lucene/src/java/org/apache/lucene/search/IndexSearcher.java (working copy) @@ -41,6 +41,7 @@ import org.apache.lucene.search.similarities.SimilarityProvider; import org.apache.lucene.store.Directory; import org.apache.lucene.store.NIOFSDirectory; // javadoc +import org.apache.lucene.util.Bits; import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.ThreadInterruptedException; @@ -273,6 +274,10 @@ reader.close(); } } + + private static Query wrapFilter(Query query, Filter filter) { + return (filter == null) ? query : new FilteredQuery(query, filter); + } /** Finds the top n * hits for query where all results are after a previous @@ -285,7 +290,7 @@ * @throws BooleanQuery.TooManyClauses */ public TopDocs searchAfter(ScoreDoc after, Query query, int n) throws IOException { - return searchAfter(after, query, null, n); + return search(createNormalizedWeight(query), after, n); } /** Finds the top n @@ -299,7 +304,7 @@ * @throws BooleanQuery.TooManyClauses */ public TopDocs searchAfter(ScoreDoc after, Query query, Filter filter, int n) throws IOException { - return search(createNormalizedWeight(query), filter, after, n); + return search(createNormalizedWeight(wrapFilter(query, filter)), after, n); } /** Finds the top n @@ -320,7 +325,7 @@ */ public TopDocs search(Query query, Filter filter, int n) throws IOException { - return search(createNormalizedWeight(query), filter, null, n); + return search(createNormalizedWeight(wrapFilter(query, filter)), null, n); } /** Lower-level search API. @@ -341,7 +346,7 @@ */ public void search(Query query, Filter filter, Collector results) throws IOException { - search(leafContexts, createNormalizedWeight(query), filter, results); + search(leafContexts, createNormalizedWeight(wrapFilter(query, filter)), results); } /** Lower-level search API. @@ -359,7 +364,7 @@ */ public void search(Query query, Collector results) throws IOException { - search(leafContexts, createNormalizedWeight(query), null, results); + search(leafContexts, createNormalizedWeight(query), results); } /** Search implementation with arbitrary sorting. Finds @@ -375,7 +380,7 @@ */ public TopFieldDocs search(Query query, Filter filter, int n, Sort sort) throws IOException { - return search(createNormalizedWeight(query), filter, n, sort); + return search(createNormalizedWeight(wrapFilter(query, filter)), n, sort); } /** @@ -388,7 +393,7 @@ */ public TopFieldDocs search(Query query, int n, Sort sort) throws IOException { - return search(createNormalizedWeight(query), null, n, sort); + return search(createNormalizedWeight(query), n, sort); } /** Expert: Low-level search implementation. Finds the top n @@ -398,9 +403,9 @@ * {@link IndexSearcher#search(Query,Filter,int)} instead. * @throws BooleanQuery.TooManyClauses */ - protected TopDocs search(Weight weight, Filter filter, ScoreDoc after, int nDocs) throws IOException { + protected TopDocs search(Weight weight, ScoreDoc after, int nDocs) throws IOException { if (executor == null) { - return search(leafContexts, weight, filter, after, nDocs); + return search(leafContexts, weight, after, nDocs); } else { final HitQueue hq = new HitQueue(nDocs, false); final Lock lock = new ReentrantLock(); @@ -408,7 +413,7 @@ for (int i = 0; i < leafSlices.length; i++) { // search each sub runner.submit( - new SearcherCallableNoSort(lock, this, leafSlices[i], weight, filter, after, nDocs, hq)); + new SearcherCallableNoSort(lock, this, leafSlices[i], weight, after, nDocs, hq)); } int totalHits = 0; @@ -429,13 +434,13 @@ } /** Expert: Low-level search implementation. Finds the top n - * hits for query, using the given leaf readers applying filter if non-null. + * hits for query. * *

Applications should usually call {@link IndexSearcher#search(Query,int)} or * {@link IndexSearcher#search(Query,Filter,int)} instead. * @throws BooleanQuery.TooManyClauses */ - protected TopDocs search(AtomicReaderContext[] leaves, Weight weight, Filter filter, ScoreDoc after, int nDocs) throws IOException { + protected TopDocs search(AtomicReaderContext[] leaves, Weight weight, ScoreDoc after, int nDocs) throws IOException { // single thread int limit = reader.maxDoc(); if (limit == 0) { @@ -443,37 +448,36 @@ } nDocs = Math.min(nDocs, limit); TopScoreDocCollector collector = TopScoreDocCollector.create(nDocs, after, !weight.scoresDocsOutOfOrder()); - search(leaves, weight, filter, collector); + search(leaves, weight, collector); return collector.topDocs(); } /** Expert: Low-level search implementation with arbitrary sorting. Finds - * the top n hits for query, applying - * filter if non-null, and sorting the hits by the criteria in - * sort. + * the top n hits for query and sorting the hits + * by the criteria in sort. * *

Applications should usually call {@link * IndexSearcher#search(Query,Filter,int,Sort)} instead. * * @throws BooleanQuery.TooManyClauses */ - protected TopFieldDocs search(Weight weight, Filter filter, + protected TopFieldDocs search(Weight weight, final int nDocs, Sort sort) throws IOException { - return search(weight, filter, nDocs, sort, true); + return search(weight, nDocs, sort, true); } /** - * Just like {@link #search(Weight, Filter, int, Sort)}, but you choose + * Just like {@link #search(Weight, int, Sort)}, but you choose * whether or not the fields in the returned {@link FieldDoc} instances should * be set by specifying fillFields. * *

NOTE: this does not compute scores by default. If you * need scores, create a {@link TopFieldCollector} * instance by calling {@link TopFieldCollector#create} and - * then pass that to {@link #search(IndexReader.AtomicReaderContext[], Weight, Filter, + * then pass that to {@link #search(IndexReader.AtomicReaderContext[], Weight, * Collector)}.

*/ - protected TopFieldDocs search(Weight weight, Filter filter, int nDocs, + protected TopFieldDocs search(Weight weight, int nDocs, Sort sort, boolean fillFields) throws IOException { @@ -481,7 +485,7 @@ if (executor == null) { // use all leaves here! - return search (leafContexts, weight, filter, nDocs, sort, fillFields); + return search (leafContexts, weight, nDocs, sort, fillFields); } else { final TopFieldCollector topCollector = TopFieldCollector.create(sort, nDocs, fillFields, @@ -493,7 +497,7 @@ final ExecutionHelper runner = new ExecutionHelper(executor); for (int i = 0; i < leafSlices.length; i++) { // search each leaf slice runner.submit( - new SearcherCallableWithSort(lock, this, leafSlices[i], weight, filter, nDocs, topCollector, sort)); + new SearcherCallableWithSort(lock, this, leafSlices[i], weight, nDocs, topCollector, sort)); } int totalHits = 0; float maxScore = Float.NEGATIVE_INFINITY; @@ -512,17 +516,17 @@ /** - * Just like {@link #search(Weight, Filter, int, Sort)}, but you choose + * Just like {@link #search(Weight, int, Sort)}, but you choose * whether or not the fields in the returned {@link FieldDoc} instances should * be set by specifying fillFields. * *

NOTE: this does not compute scores by default. If you * need scores, create a {@link TopFieldCollector} * instance by calling {@link TopFieldCollector#create} and - * then pass that to {@link #search(IndexReader.AtomicReaderContext[], Weight, Filter, + * then pass that to {@link #search(IndexReader.AtomicReaderContext[], Weight, * Collector)}.

*/ - protected TopFieldDocs search(AtomicReaderContext[] leaves, Weight weight, Filter filter, int nDocs, + protected TopFieldDocs search(AtomicReaderContext[] leaves, Weight weight, int nDocs, Sort sort, boolean fillFields) throws IOException { // single thread int limit = reader.maxDoc(); @@ -533,7 +537,7 @@ TopFieldCollector collector = TopFieldCollector.create(sort, nDocs, fillFields, fieldSortDoTrackScores, fieldSortDoMaxScore, !weight.scoresDocsOutOfOrder()); - search(leaves, weight, filter, collector); + search(leaves, weight, collector); return (TopFieldDocs) collector.topDocs(); } @@ -557,80 +561,50 @@ * the searchers leaves to execute the searches on * @param weight * to match documents - * @param filter - * if non-null, used to permit documents to be collected. * @param collector * to receive hits * @throws BooleanQuery.TooManyClauses */ - protected void search(AtomicReaderContext[] leaves, Weight weight, Filter filter, Collector collector) + protected void search(AtomicReaderContext[] leaves, Weight weight, Collector collector) throws IOException { // TODO: should we make this // threaded...? the Collector could be sync'd? // always use single thread: - if (filter == null) { - for (int i = 0; i < leaves.length; i++) { // search each subreader - collector.setNextReader(leaves[i]); - Scorer scorer = weight.scorer(leaves[i], !collector.acceptsDocsOutOfOrder(), true, leaves[i].reader.getLiveDocs()); - if (scorer != null) { - scorer.score(collector); - } + for (int i = 0; i < leaves.length; i++) { // search each subreader + collector.setNextReader(leaves[i]); + Scorer scorer = weight.scorer(leaves[i], !collector.acceptsDocsOutOfOrder(), true, leaves[i].reader.getLiveDocs()); + if (scorer != null) { + scorer.score(collector); } - } else { - for (int i = 0; i < leaves.length; i++) { // search each subreader - collector.setNextReader(leaves[i]); - searchWithFilter(leaves[i], weight, filter, collector); - } } } - private void searchWithFilter(AtomicReaderContext context, Weight weight, - final Filter filter, final Collector collector) throws IOException { + private int filterRandomAccessThreshold = 100; - assert filter != null; - - // we are gonna advance() this scorer, so we set inorder=true/toplevel=false - Scorer scorer = weight.scorer(context, true, false, context.reader.getLiveDocs()); - if (scorer == null) { - return; - } - - int docID = scorer.docID(); - assert docID == -1 || docID == DocIdSetIterator.NO_MORE_DOCS; - - // CHECKME: use ConjunctionScorer here? - DocIdSet filterDocIdSet = filter.getDocIdSet(context); - if (filterDocIdSet == null) { - // this means the filter does not accept any documents. - return; - } - - DocIdSetIterator filterIter = filterDocIdSet.iterator(); - if (filterIter == null) { - // this means the filter does not accept any documents. - return; - } - int filterDoc = filterIter.nextDoc(); - int scorerDoc = scorer.advance(filterDoc); - - collector.setScorer(scorer); - while (true) { - if (scorerDoc == filterDoc) { - // Check if scorer has exhausted, only before collecting. - if (scorerDoc == DocIdSetIterator.NO_MORE_DOCS) { - break; - } - collector.collect(scorerDoc); - filterDoc = filterIter.nextDoc(); - scorerDoc = scorer.advance(filterDoc); - } else if (scorerDoc > filterDoc) { - filterDoc = filterIter.advance(scorerDoc); - } else { - scorerDoc = scorer.advance(filterDoc); - } - } + /** + * Expert: + * + * @return Threshold used to heuristics to determine if a Filter is dense or sparse. + * @see #setFilterRandomAccessThreshold(int) + * @lucene.experimental + */ + public int getFilterRandomAccessThreshold() { + return filterRandomAccessThreshold; } + + /** + * Expert: Sets the threshold used in the heuristics to determine if a + * Filter is dense or sparse (and therefore whether random-access should + * be used or not). If a document is found in the Filter beneath this + * threshold, it is assumed to be dense. + * + * @param value Threshold to be used in this IndexSearcher + * @lucene.experimental + */ + public void setFilterRandomAccessThreshold(int value) { + filterRandomAccessThreshold = value; + } /** Expert: called to re-write queries into primitive queries. * @throws BooleanQuery.TooManyClauses @@ -729,18 +703,16 @@ private final Lock lock; private final IndexSearcher searcher; private final Weight weight; - private final Filter filter; private final ScoreDoc after; private final int nDocs; private final HitQueue hq; private final LeafSlice slice; public SearcherCallableNoSort(Lock lock, IndexSearcher searcher, LeafSlice slice, Weight weight, - Filter filter, ScoreDoc after, int nDocs, HitQueue hq) { + ScoreDoc after, int nDocs, HitQueue hq) { this.lock = lock; this.searcher = searcher; this.weight = weight; - this.filter = filter; this.after = after; this.nDocs = nDocs; this.hq = hq; @@ -748,7 +720,7 @@ } public TopDocs call() throws IOException { - final TopDocs docs = searcher.search (slice.leaves, weight, filter, after, nDocs); + final TopDocs docs = searcher.search (slice.leaves, weight, after, nDocs); final ScoreDoc[] scoreDocs = docs.scoreDocs; //it would be so nice if we had a thread-safe insert lock.lock(); @@ -775,18 +747,16 @@ private final Lock lock; private final IndexSearcher searcher; private final Weight weight; - private final Filter filter; private final int nDocs; private final TopFieldCollector hq; private final Sort sort; private final LeafSlice slice; public SearcherCallableWithSort(Lock lock, IndexSearcher searcher, LeafSlice slice, Weight weight, - Filter filter, int nDocs, TopFieldCollector hq, Sort sort) { + int nDocs, TopFieldCollector hq, Sort sort) { this.lock = lock; this.searcher = searcher; this.weight = weight; - this.filter = filter; this.nDocs = nDocs; this.hq = hq; this.sort = sort; @@ -831,7 +801,7 @@ public TopFieldDocs call() throws IOException { assert slice.leaves.length == 1; - final TopFieldDocs docs = searcher.search (slice.leaves, weight, filter, nDocs, sort, true); + final TopFieldDocs docs = searcher.search (slice.leaves, weight, nDocs, sort, true); lock.lock(); try { final int base = slice.leaves[0].docBase; Index: lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java (revision 1180003) +++ lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java (working copy) @@ -105,7 +105,7 @@ * results. */ @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { final IndexReader reader = context.reader; final Fields fields = reader.fields(); if (fields == null) { @@ -125,13 +125,12 @@ // fill into a FixedBitSet final FixedBitSet bitSet = new FixedBitSet(context.reader.maxDoc()); int termCount = 0; - final Bits liveDocs = reader.getLiveDocs(); DocsEnum docsEnum = null; do { termCount++; // System.out.println(" iter termCount=" + termCount + " term=" + // enumerator.term().toBytesString()); - docsEnum = termsEnum.docs(liveDocs, docsEnum); + docsEnum = termsEnum.docs(acceptDocs, docsEnum); final DocsEnum.BulkReadResult result = docsEnum.getBulkResult(); while (true) { final int count = docsEnum.read(); Index: lucene/src/java/org/apache/lucene/search/QueryWrapperFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/QueryWrapperFilter.java (revision 1180003) +++ lucene/src/java/org/apache/lucene/search/QueryWrapperFilter.java (working copy) @@ -20,6 +20,7 @@ import java.io.IOException; import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.util.Bits; /** * Constrains search results to only match those which also match a provided @@ -47,7 +48,7 @@ } @Override - public DocIdSet getDocIdSet(final AtomicReaderContext context) throws IOException { + public DocIdSet getDocIdSet(final AtomicReaderContext context, final Bits acceptDocs) throws IOException { // get a private context that is used to rewrite, createWeight and score eventually assert context.reader.getTopReaderContext().isAtomic; final AtomicReaderContext privateContext = (AtomicReaderContext) context.reader.getTopReaderContext(); @@ -55,7 +56,7 @@ return new DocIdSet() { @Override public DocIdSetIterator iterator() throws IOException { - return weight.scorer(privateContext, true, false, privateContext.reader.getLiveDocs()); + return weight.scorer(privateContext, true, false, acceptDocs); } @Override public boolean isCacheable() { return false; } Index: lucene/src/java/org/apache/lucene/search/SpanFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/SpanFilter.java (revision 1180003) +++ lucene/src/java/org/apache/lucene/search/SpanFilter.java (working copy) @@ -16,6 +16,7 @@ */ import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.util.Bits; import java.io.IOException; @@ -34,5 +35,5 @@ * @return A {@link SpanFilterResult} * @throws java.io.IOException if there was an issue accessing the necessary information * */ - public abstract SpanFilterResult bitSpans(AtomicReaderContext context) throws IOException; + public abstract SpanFilterResult bitSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException; } Index: lucene/src/java/org/apache/lucene/search/SpanFilterResult.java =================================================================== --- lucene/src/java/org/apache/lucene/search/SpanFilterResult.java (revision 1180003) +++ lucene/src/java/org/apache/lucene/search/SpanFilterResult.java (working copy) @@ -16,7 +16,7 @@ */ import java.util.ArrayList; - +import java.util.Collections; import java.util.List; @@ -30,6 +30,9 @@ private DocIdSet docIdSet; private List positions;//Spans spans; + public static final SpanFilterResult EMPTY_SPAN_FILTER_RESULT = + new SpanFilterResult(DocIdSet.EMPTY_DOCIDSET, Collections.emptyList()); + /** * * @param docIdSet The DocIdSet for the Filter Index: lucene/src/java/org/apache/lucene/search/SpanQueryFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/SpanQueryFilter.java (revision 1180003) +++ lucene/src/java/org/apache/lucene/search/SpanQueryFilter.java (working copy) @@ -19,6 +19,7 @@ import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.Spans; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; import java.io.IOException; @@ -52,16 +53,16 @@ } @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { - SpanFilterResult result = bitSpans(context); + public final DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { + SpanFilterResult result = bitSpans(context, acceptDocs); return result.getDocIdSet(); } @Override - public SpanFilterResult bitSpans(AtomicReaderContext context) throws IOException { + public SpanFilterResult bitSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException { final FixedBitSet bits = new FixedBitSet(context.reader.maxDoc()); - Spans spans = query.getSpans(context, context.reader.getLiveDocs()); + Spans spans = query.getSpans(context, acceptDocs); List tmp = new ArrayList(20); int currentDoc = -1; SpanFilterResult.PositionInfo currentInfo = null; Index: lucene/src/java/org/apache/lucene/util/DocIdBitSet.java =================================================================== --- lucene/src/java/org/apache/lucene/util/DocIdBitSet.java (revision 1180003) +++ lucene/src/java/org/apache/lucene/util/DocIdBitSet.java (working copy) @@ -24,8 +24,8 @@ /** Simple DocIdSet and DocIdSetIterator backed by a BitSet */ -public class DocIdBitSet extends DocIdSet { - private BitSet bitSet; +public class DocIdBitSet extends DocIdSet implements Bits { + private final BitSet bitSet; public DocIdBitSet(BitSet bitSet) { this.bitSet = bitSet; @@ -36,6 +36,11 @@ return new DocIdBitSetIterator(bitSet); } + @Override + public Bits bits() { + return this; + } + /** This DocIdSet implementation is cacheable. */ @Override public boolean isCacheable() { @@ -46,9 +51,20 @@ * Returns the underlying BitSet. */ public BitSet getBitSet() { - return this.bitSet; + return this.bitSet; } + @Override + public boolean get(int index) { + return bitSet.get(index); + } + + @Override + public int length() { + // the size may not be correct... + return bitSet.size(); + } + private static class DocIdBitSetIterator extends DocIdSetIterator { private int docId; private BitSet bitSet; Index: lucene/src/java/org/apache/lucene/util/FixedBitSet.java =================================================================== --- lucene/src/java/org/apache/lucene/util/FixedBitSet.java (revision 1180003) +++ lucene/src/java/org/apache/lucene/util/FixedBitSet.java (working copy) @@ -67,6 +67,11 @@ } @Override + public Bits bits() { + return this; + } + + @Override public int length() { return numBits; } Index: lucene/src/java/org/apache/lucene/util/OpenBitSet.java =================================================================== --- lucene/src/java/org/apache/lucene/util/OpenBitSet.java (revision 1180003) +++ lucene/src/java/org/apache/lucene/util/OpenBitSet.java (working copy) @@ -119,6 +119,11 @@ return new OpenBitSetIterator(bits, wlen); } + @Override + public Bits bits() { + return this; + } + /** This DocIdSet implementation is cacheable. */ @Override public boolean isCacheable() { Index: lucene/src/test-framework/org/apache/lucene/search/CachingWrapperFilterHelper.java =================================================================== --- lucene/src/test-framework/org/apache/lucene/search/CachingWrapperFilterHelper.java (revision 1180003) +++ lucene/src/test-framework/org/apache/lucene/search/CachingWrapperFilterHelper.java (working copy) @@ -22,6 +22,7 @@ import junit.framework.Assert; import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.util.Bits; /** * A unit test helper class to test when the filter is getting cached and when it is not. @@ -42,10 +43,10 @@ } @Override - public synchronized DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + public synchronized DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { final int saveMissCount = missCount; - DocIdSet docIdSet = super.getDocIdSet(context); + DocIdSet docIdSet = super.getDocIdSet(context, acceptDocs); if (shouldHaveCache) { Assert.assertEquals("Cache should have data ", saveMissCount, missCount); Index: lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java =================================================================== --- lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java (revision 1180003) +++ lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java (working copy) @@ -1302,6 +1302,7 @@ } IndexSearcher ret = random.nextBoolean() ? new AssertingIndexSearcher(r) : new AssertingIndexSearcher(r.getTopReaderContext()); ret.setSimilarityProvider(similarityProvider); + ret.setFilterRandomAccessThreshold(_TestUtil.nextInt(random, 1, 200)); return ret; } else { int threads = 0; @@ -1326,6 +1327,7 @@ } }; ret.setSimilarityProvider(similarityProvider); + ret.setFilterRandomAccessThreshold(_TestUtil.nextInt(random, 1, 200)); return ret; } } Index: lucene/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java =================================================================== --- lucene/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java (revision 1180003) +++ lucene/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java (working copy) @@ -24,6 +24,7 @@ import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.OpenBitSet; @@ -109,7 +110,7 @@ * results. */ @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException { final FieldCache.DocTermsIndex fcsi = FieldCache.DEFAULT.getTermsIndex(context.reader, query.field); // Cannot use FixedBitSet because we require long index (ord): final OpenBitSet termSet = new OpenBitSet(fcsi.numOrd()); @@ -158,7 +159,8 @@ return DocIdSet.EMPTY_DOCIDSET; } - return new FieldCacheRangeFilter.FieldCacheDocIdSet(context.reader, true) { + final int maxDoc = context.reader.maxDoc(); + return new FieldCacheRangeFilter.FieldCacheDocIdSet(maxDoc, acceptDocs) { @Override boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException { return termSet.get(fcsi.getOrd(doc)); Index: lucene/src/test/org/apache/lucene/search/JustCompileSearch.java =================================================================== --- lucene/src/test/org/apache/lucene/search/JustCompileSearch.java (revision 1180003) +++ lucene/src/test/org/apache/lucene/search/JustCompileSearch.java (working copy) @@ -160,7 +160,7 @@ // still added here in case someone will add abstract methods in the future. @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { return null; } } @@ -288,12 +288,12 @@ static final class JustCompileSpanFilter extends SpanFilter { @Override - public SpanFilterResult bitSpans(AtomicReaderContext context) throws IOException { + public SpanFilterResult bitSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { return null; } } Index: lucene/src/test/org/apache/lucene/search/MockFilter.java =================================================================== --- lucene/src/test/org/apache/lucene/search/MockFilter.java (revision 1180003) +++ lucene/src/test/org/apache/lucene/search/MockFilter.java (working copy) @@ -19,15 +19,16 @@ import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.util.DocIdBitSet; -import java.util.BitSet; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.Bits; public class MockFilter extends Filter { private boolean wasCalled; @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) { wasCalled = true; - return new DocIdBitSet(new BitSet()); + return new FixedBitSet(context.reader.maxDoc()); } public void clear() { Index: lucene/src/test/org/apache/lucene/search/SingleDocTestFilter.java =================================================================== --- lucene/src/test/org/apache/lucene/search/SingleDocTestFilter.java (revision 1180003) +++ lucene/src/test/org/apache/lucene/search/SingleDocTestFilter.java (working copy) @@ -18,9 +18,9 @@ */ import org.apache.lucene.index.IndexReader.AtomicReaderContext; -import org.apache.lucene.util.DocIdBitSet; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.FixedBitSet; -import java.util.BitSet; import java.io.IOException; public class SingleDocTestFilter extends Filter { @@ -31,9 +31,10 @@ } @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { - BitSet bits = new BitSet(context.reader.maxDoc()); + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { + FixedBitSet bits = new FixedBitSet(context.reader.maxDoc()); bits.set(doc); - return new DocIdBitSet(bits); + if (acceptDocs != null && !acceptDocs.get(doc)) bits.clear(doc); + return bits; } } Index: lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java (revision 1180003) +++ lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java (working copy) @@ -32,8 +32,9 @@ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; +@org.junit.Ignore("nocommit: Test disabled") public class TestCachingSpanFilter extends LuceneTestCase { - +/* public void testEnforceDeletions() throws Exception { Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter( @@ -87,13 +88,15 @@ searcher.close(); searcher = newSearcher(reader, false); + // no hit because IS ANDs del docs: docs = searcher.search(new MatchAllDocsQuery(), filter, 1); assertEquals("[query + filter] Should *not* find a hit...", 0, docs.totalHits); + // no hit because CSQ realizes wrapped filter includes + // non-live docs and ANDs the live docs on the fly: docs = searcher.search(constantScore, 1); - assertEquals("[just filter] Should find a hit...", 1, docs.totalHits); + assertEquals("[just filter] Should not find a hit...", 0, docs.totalHits); - // force cache to regenerate: filter = new CachingSpanFilter(startFilter, CachingWrapperFilter.DeletesMode.RECACHE); @@ -160,4 +163,5 @@ return oldReader; } } + */ } Index: lucene/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java (revision 1180003) +++ lucene/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java (working copy) @@ -33,8 +33,9 @@ import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util._TestUtil; +@org.junit.Ignore("nocommit: Test disabled") public class TestCachingWrapperFilter extends LuceneTestCase { - + /* public void testCachingWorks() throws Exception { Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, dir); @@ -192,6 +193,8 @@ docs = searcher.search(new MatchAllDocsQuery(), filter, 1); assertEquals("[query + filter] Should find a hit...", 1, docs.totalHits); + + // ignore deletes ConstantScoreQuery constantScore = new ConstantScoreQuery(filter); docs = searcher.search(constantScore, 1); assertEquals("[just filter] Should find a hit...", 1, docs.totalHits); @@ -204,13 +207,15 @@ searcher.close(); searcher = newSearcher(reader, false); + // no hit because IS ANDs del docs docs = searcher.search(new MatchAllDocsQuery(), filter, 1); assertEquals("[query + filter] Should *not* find a hit...", 0, docs.totalHits); + // no hit because CSQ realizes wrapped filter includes + // non-live docs and ANDs the live docs on the fly: docs = searcher.search(constantScore, 1); - assertEquals("[just filter] Should find a hit...", 1, docs.totalHits); + assertEquals("[just filter] Should not find a hit...", 0, docs.totalHits); - // force cache to regenerate: filter = new CachingWrapperFilter(startFilter, CachingWrapperFilter.DeletesMode.RECACHE); @@ -312,4 +317,5 @@ return oldReader; } } + */ } Index: lucene/src/test/org/apache/lucene/search/TestConstantScoreQuery.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestConstantScoreQuery.java (revision 1180003) +++ lucene/src/test/org/apache/lucene/search/TestConstantScoreQuery.java (working copy) @@ -130,5 +130,32 @@ if (directory != null) directory.close(); } } + + public void testConstantScoreQueryAndFilter() throws Exception { + Directory d = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random, d); + Document doc = new Document(); + doc.add(newField("field", "a", StringField.TYPE_UNSTORED)); + w.addDocument(doc); + doc = new Document(); + doc.add(newField("field", "b", StringField.TYPE_UNSTORED)); + w.addDocument(doc); + IndexReader r = w.getReader(); + w.close(); + + Filter filterB = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("field", "b")))); + Query query = new ConstantScoreQuery(filterB); + + IndexSearcher s = new IndexSearcher(r); + assertEquals(1, s.search(query, filterB, 1).totalHits); // Query for field:b, Filter field:b + + Filter filterA = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("field", "a")))); + query = new ConstantScoreQuery(filterA); + + assertEquals(0, s.search(query, filterB, 1).totalHits); // Query field:b, Filter field:a + + r.close(); + d.close(); + } } Index: lucene/src/test/org/apache/lucene/search/TestDocIdSet.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestDocIdSet.java (revision 1180003) +++ lucene/src/test/org/apache/lucene/search/TestDocIdSet.java (working copy) @@ -30,6 +30,7 @@ import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.LuceneTestCase; public class TestDocIdSet extends LuceneTestCase { @@ -114,7 +115,7 @@ // Now search w/ a Filter which returns a null DocIdSet Filter f = new Filter() { @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { return null; } }; Index: lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java (revision 1180003) +++ lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java (working copy) @@ -28,6 +28,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.DocIdBitSet; import org.apache.lucene.util.LuceneTestCase; @@ -82,6 +83,7 @@ writer.close (); searcher = newSearcher(reader); + query = new TermQuery (new Term ("field", "three")); filter = newStaticFilterB(); } @@ -90,10 +92,11 @@ private static Filter newStaticFilterB() { return new Filter() { @Override - public DocIdSet getDocIdSet (AtomicReaderContext context) { + public DocIdSet getDocIdSet (AtomicReaderContext context, Bits acceptDocs) { + if (acceptDocs == null) acceptDocs = new Bits.MatchAllBits(5); BitSet bitset = new BitSet(5); - bitset.set (1); - bitset.set (3); + if (acceptDocs.get(1)) bitset.set(1); + if (acceptDocs.get(3)) bitset.set(3); return new DocIdBitSet(bitset); } }; @@ -106,9 +109,17 @@ directory.close(); super.tearDown(); } + + public void testFilteredQuery() throws Exception { + // force the filter to be executed as bits + searcher.setFilterRandomAccessThreshold(Integer.MAX_VALUE); + tFilteredQuery(); + // force the filter to be executed as iterator + searcher.setFilterRandomAccessThreshold(0); + tFilteredQuery(); + } - public void testFilteredQuery() - throws Exception { + private void tFilteredQuery() throws Exception { Query filteredquery = new FilteredQuery (query, filter); ScoreDoc[] hits = searcher.search (filteredquery, null, 1000).scoreDocs; assertEquals (1, hits.length); @@ -161,7 +172,8 @@ private static Filter newStaticFilterA() { return new Filter() { @Override - public DocIdSet getDocIdSet (AtomicReaderContext context) { + public DocIdSet getDocIdSet (AtomicReaderContext context, Bits acceptDocs) { + assertNull("acceptDocs should be null, as we have an index without deletions", acceptDocs); BitSet bitset = new BitSet(5); bitset.set(0, 5); return new DocIdBitSet(bitset); @@ -187,6 +199,15 @@ * This tests FilteredQuery's rewrite correctness */ public void testRangeQuery() throws Exception { + // force the filter to be executed as bits + searcher.setFilterRandomAccessThreshold(Integer.MAX_VALUE); + tRangeQuery(); + // force the filter to be executed as iterator + searcher.setFilterRandomAccessThreshold(0); + tRangeQuery(); + } + + private void tRangeQuery() throws Exception { TermRangeQuery rq = TermRangeQuery.newStringRange( "sorter", "b", "d", true, true); @@ -196,7 +217,16 @@ QueryUtils.check(random, filteredquery,searcher); } - public void testBoolean() throws Exception { + public void testBooleanMUST() throws Exception { + // force the filter to be executed as bits + searcher.setFilterRandomAccessThreshold(Integer.MAX_VALUE); + tBooleanMUST(); + // force the filter to be executed as iterator + searcher.setFilterRandomAccessThreshold(0); + tBooleanMUST(); + } + + private void tBooleanMUST() throws Exception { BooleanQuery bq = new BooleanQuery(); Query query = new FilteredQuery(new MatchAllDocsQuery(), new SingleDocTestFilter(0)); @@ -209,9 +239,40 @@ QueryUtils.check(random, query,searcher); } + public void testBooleanSHOULD() throws Exception { + // force the filter to be executed as bits + searcher.setFilterRandomAccessThreshold(Integer.MAX_VALUE); + tBooleanSHOULD(); + // force the filter to be executed as iterator + searcher.setFilterRandomAccessThreshold(0); + tBooleanSHOULD(); + } + + private void tBooleanSHOULD() throws Exception { + BooleanQuery bq = new BooleanQuery(); + Query query = new FilteredQuery(new MatchAllDocsQuery(), + new SingleDocTestFilter(0)); + bq.add(query, BooleanClause.Occur.SHOULD); + query = new FilteredQuery(new MatchAllDocsQuery(), + new SingleDocTestFilter(1)); + bq.add(query, BooleanClause.Occur.SHOULD); + ScoreDoc[] hits = searcher.search(bq, null, 1000).scoreDocs; + assertEquals(2, hits.length); + QueryUtils.check(random, query,searcher); + } + // Make sure BooleanQuery, which does out-of-order // scoring, inside FilteredQuery, works public void testBoolean2() throws Exception { + // force the filter to be executed as bits + searcher.setFilterRandomAccessThreshold(Integer.MAX_VALUE); + tBoolean2(); + // force the filter to be executed as iterator + searcher.setFilterRandomAccessThreshold(0); + tBoolean2(); + } + + private void tBoolean2() throws Exception { BooleanQuery bq = new BooleanQuery(); Query query = new FilteredQuery(bq, new SingleDocTestFilter(0)); Index: lucene/src/test/org/apache/lucene/search/TestFilteredSearch.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestFilteredSearch.java (revision 1180003) +++ lucene/src/test/org/apache/lucene/search/TestFilteredSearch.java (working copy) @@ -23,13 +23,17 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.Directory; import org.apache.lucene.store.LockObtainFailedException; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; @@ -95,7 +99,8 @@ } @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) { + public DocIdSet getDocIdSet (AtomicReaderContext context, Bits acceptDocs) { + assertNull("acceptDocs should be null, as we have an index without deletions", acceptDocs); assert context.isAtomic; final FixedBitSet set = new FixedBitSet(context.reader.maxDoc()); int docBase = context.docBase; @@ -116,4 +121,56 @@ } } + /** when a filter is executed random access, make sure + * we get BucketScorer + */ + public void testBS1WithRandomAccessFilter() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter iw = new RandomIndexWriter(random, dir); + Document d = new Document(); + d.add(newField("foo", "bar", TextField.TYPE_STORED)); + iw.addDocument(d); + d = new Document(); + d.add(newField("foo", "baz", TextField.TYPE_STORED)); + iw.addDocument(d); + + IndexReader ir = iw.getReader(); + iw.close(); + + IndexSearcher is = newSearcher(ir); + + // force the filter to be executed as bits + is.setFilterRandomAccessThreshold(Integer.MAX_VALUE); + + // for the combined BQ, the scorer should always be BooleanScorer's BucketScorer, because our scorer supports out-of order collection! + final String bucketScorerClass = BooleanScorer.class.getName() + "$BucketScorer"; + BooleanQuery bq = new BooleanQuery(); + bq.add(new TermQuery(new Term("foo", "bar")), BooleanClause.Occur.SHOULD); + bq.add(new TermQuery(new Term("foo", "baz")), BooleanClause.Occur.SHOULD); + is.search(bq, new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("foo", "bar")))), + new Collector() { + + @Override + public void setScorer(Scorer scorer) throws IOException { + assertEquals(bucketScorerClass, scorer.getClass().getName()); + } + + @Override + public void collect(int doc) throws IOException { + } + + @Override + public void setNextReader(AtomicReaderContext context) throws IOException { + } + + @Override + public boolean acceptsDocsOutOfOrder() { + return true; + } + } + ); + is.close(); + ir.close(); + dir.close(); + } } Index: lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java (revision 1180003) +++ lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java (working copy) @@ -182,13 +182,13 @@ public void testInverseRange() throws Exception { AtomicReaderContext context = (AtomicReaderContext) new SlowMultiReaderWrapper(searcher.getIndexReader()).getTopReaderContext(); NumericRangeFilter f = NumericRangeFilter.newIntRange("field8", 8, 1000, -1000, true, true); - assertSame("A inverse range should return the EMPTY_DOCIDSET instance", DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(context)); + assertSame("A inverse range should return the EMPTY_DOCIDSET instance", DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(context, context.reader.getLiveDocs())); f = NumericRangeFilter.newIntRange("field8", 8, Integer.MAX_VALUE, null, false, false); assertSame("A exclusive range starting with Integer.MAX_VALUE should return the EMPTY_DOCIDSET instance", - DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(context)); + DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(context, context.reader.getLiveDocs())); f = NumericRangeFilter.newIntRange("field8", 8, null, Integer.MIN_VALUE, false, false); assertSame("A exclusive range ending with Integer.MIN_VALUE should return the EMPTY_DOCIDSET instance", - DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(context)); + DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(context, context.reader.getLiveDocs())); } @Test Index: lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java (revision 1180003) +++ lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java (working copy) @@ -188,13 +188,13 @@ AtomicReaderContext context = (AtomicReaderContext) new SlowMultiReaderWrapper(searcher.getIndexReader()).getTopReaderContext(); NumericRangeFilter f = NumericRangeFilter.newLongRange("field8", 8, 1000L, -1000L, true, true); assertSame("A inverse range should return the EMPTY_DOCIDSET instance", DocIdSet.EMPTY_DOCIDSET, - f.getDocIdSet(context)); + f.getDocIdSet(context, context.reader.getLiveDocs())); f = NumericRangeFilter.newLongRange("field8", 8, Long.MAX_VALUE, null, false, false); assertSame("A exclusive range starting with Long.MAX_VALUE should return the EMPTY_DOCIDSET instance", - DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(context)); + DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(context, context.reader.getLiveDocs())); f = NumericRangeFilter.newLongRange("field8", 8, null, Long.MIN_VALUE, false, false); assertSame("A exclusive range ending with Long.MIN_VALUE should return the EMPTY_DOCIDSET instance", - DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(context)); + DocIdSet.EMPTY_DOCIDSET, f.getDocIdSet(context, context.reader.getLiveDocs())); } @Test Index: lucene/src/test/org/apache/lucene/search/TestScorerPerf.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestScorerPerf.java (revision 1180003) +++ lucene/src/test/org/apache/lucene/search/TestScorerPerf.java (working copy) @@ -1,5 +1,6 @@ package org.apache.lucene.search; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.DocIdBitSet; import org.apache.lucene.util.LuceneTestCase; @@ -141,7 +142,8 @@ final BitSet rnd = sets[random.nextInt(sets.length)]; Query q = new ConstantScoreQuery(new Filter() { @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) { + public DocIdSet getDocIdSet (AtomicReaderContext context, Bits acceptDocs) { + assertNull("acceptDocs should be null, as we have an index without deletions", acceptDocs); return new DocIdBitSet(rnd); } }); Index: lucene/src/test/org/apache/lucene/search/TestSort.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestSort.java (revision 1180003) +++ lucene/src/test/org/apache/lucene/search/TestSort.java (working copy) @@ -53,6 +53,7 @@ import org.apache.lucene.search.cache.ShortValuesCreator; import org.apache.lucene.store.Directory; import org.apache.lucene.store.LockObtainFailedException; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.DocIdBitSet; import org.apache.lucene.util.LuceneTestCase; @@ -730,7 +731,8 @@ // a filter that only allows through the first hit Filter filt = new Filter() { @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + public DocIdSet getDocIdSet (AtomicReaderContext context, Bits acceptDocs) { + assertNull("acceptDocs should be null, as we have no deletions", acceptDocs); BitSet bs = new BitSet(context.reader.maxDoc()); bs.set(0, context.reader.maxDoc()); bs.set(docs1.scoreDocs[0].doc); Index: lucene/src/test/org/apache/lucene/search/TestSpanQueryFilter.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestSpanQueryFilter.java (revision 1180003) +++ lucene/src/test/org/apache/lucene/search/TestSpanQueryFilter.java (working copy) @@ -49,7 +49,7 @@ int subIndex = ReaderUtil.subIndex(number, leaves); // find the reader with this document in it SpanTermQuery query = new SpanTermQuery(new Term("field", English.intToEnglish(number).trim())); SpanQueryFilter filter = new SpanQueryFilter(query); - SpanFilterResult result = filter.bitSpans(leaves[subIndex]); + SpanFilterResult result = filter.bitSpans(leaves[subIndex], leaves[subIndex].reader.getLiveDocs()); DocIdSet docIdSet = result.getDocIdSet(); assertTrue("docIdSet is null and it shouldn't be", docIdSet != null); assertContainsDocId("docIdSet doesn't contain docId 10", docIdSet, number - leaves[subIndex].docBase); Index: lucene/src/test/org/apache/lucene/search/TestTopDocsMerge.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestTopDocsMerge.java (revision 1180003) +++ lucene/src/test/org/apache/lucene/search/TestTopDocsMerge.java (working copy) @@ -44,11 +44,11 @@ } public void search(Weight weight, Collector collector) throws IOException { - search(ctx, weight, null, collector); + search(ctx, weight, collector); } public TopDocs search(Weight weight, int topN) throws IOException { - return search(ctx, weight, null, null, topN); + return search(ctx, weight, null, topN); } @Override Index: modules/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java (revision 1180003) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java (working copy) @@ -505,7 +505,7 @@ subDocUpto = 0; docBase = readerContext.docBase; //System.out.println("setNextReader base=" + docBase + " r=" + readerContext.reader); - lastDocPerGroupBits = lastDocPerGroup.getDocIdSet(readerContext).iterator(); + lastDocPerGroupBits = lastDocPerGroup.getDocIdSet(readerContext, readerContext.reader.getLiveDocs()).iterator(); groupEndDocID = -1; currentReaderContext = readerContext; Index: modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java =================================================================== --- modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java (revision 1180003) +++ modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java (working copy) @@ -1221,11 +1221,11 @@ } public void search(Weight weight, Collector collector) throws IOException { - search(ctx, weight, null, collector); + search(ctx, weight, collector); } public TopDocs search(Weight weight, int topN) throws IOException { - return search(ctx, weight, null, null, topN); + return search(ctx, weight, null, topN); } @Override Index: modules/join/src/java/org/apache/lucene/search/join/BlockJoinQuery.java =================================================================== --- modules/join/src/java/org/apache/lucene/search/join/BlockJoinQuery.java (revision 1180003) +++ modules/join/src/java/org/apache/lucene/search/join/BlockJoinQuery.java (working copy) @@ -163,7 +163,7 @@ return null; } - final DocIdSet parents = parentsFilter.getDocIdSet(readerContext); + final DocIdSet parents = parentsFilter.getDocIdSet(readerContext, readerContext.reader.getLiveDocs()); // TODO: once we do random-access filters we can // generalize this: if (parents == null) { Index: modules/queries/src/java/org/apache/lucene/queries/BooleanFilter.java =================================================================== --- modules/queries/src/java/org/apache/lucene/queries/BooleanFilter.java (revision 1180003) +++ modules/queries/src/java/org/apache/lucene/queries/BooleanFilter.java (working copy) @@ -24,10 +24,12 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.search.BitsFilteredDocIdSet; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Filter; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; /** @@ -48,7 +50,7 @@ * of the filters that have been added. */ @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { FixedBitSet res = null; final IndexReader reader = context.reader; @@ -91,12 +93,13 @@ } } - return res != null ? res : DocIdSet.EMPTY_DOCIDSET; + return res != null ? BitsFilteredDocIdSet.wrap(res, acceptDocs) : DocIdSet.EMPTY_DOCIDSET; } private static DocIdSetIterator getDISI(Filter filter, AtomicReaderContext context) throws IOException { - final DocIdSet set = filter.getDocIdSet(context); + // we dont pass acceptDocs, we will filter at the end using an additional filter + final DocIdSet set = filter.getDocIdSet(context, null); return (set == null || set == DocIdSet.EMPTY_DOCIDSET) ? null : set.iterator(); } Index: modules/queries/src/java/org/apache/lucene/queries/ChainedFilter.java =================================================================== --- modules/queries/src/java/org/apache/lucene/queries/ChainedFilter.java (revision 1180003) +++ modules/queries/src/java/org/apache/lucene/queries/ChainedFilter.java (working copy) @@ -19,9 +19,11 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.search.BitsFilteredDocIdSet; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Filter; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.OpenBitSet; import org.apache.lucene.util.OpenBitSetDISI; @@ -97,21 +99,22 @@ * {@link Filter#getDocIdSet}. */ @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { int[] index = new int[1]; // use array as reference to modifiable int; index[0] = 0; // an object attribute would not be thread safe. if (logic != -1) { - return getDocIdSet(context, logic, index); + return BitsFilteredDocIdSet.wrap(getDocIdSet(context, logic, index), acceptDocs); } else if (logicArray != null) { - return getDocIdSet(context, logicArray, index); + return BitsFilteredDocIdSet.wrap(getDocIdSet(context, logicArray, index), acceptDocs); } - - return getDocIdSet(context, DEFAULT, index); + + return BitsFilteredDocIdSet.wrap(getDocIdSet(context, DEFAULT, index), acceptDocs); } private DocIdSetIterator getDISI(Filter filter, AtomicReaderContext context) throws IOException { - DocIdSet docIdSet = filter.getDocIdSet(context); + // we dont pass acceptDocs, we will filter at the end using an additional filter + DocIdSet docIdSet = filter.getDocIdSet(context, null); if (docIdSet == null) { return DocIdSet.EMPTY_DOCIDSET.iterator(); } else { @@ -156,7 +159,8 @@ throws IOException { OpenBitSetDISI result = initialResult(context, logic, index); for (; index[0] < chain.length; index[0]++) { - doChain(result, logic, chain[index[0]].getDocIdSet(context)); + // we dont pass acceptDocs, we will filter at the end using an additional filter + doChain(result, logic, chain[index[0]].getDocIdSet(context, null)); } return result; } @@ -176,7 +180,8 @@ OpenBitSetDISI result = initialResult(context, logic[0], index); for (; index[0] < chain.length; index[0]++) { - doChain(result, logic[index[0]], chain[index[0]].getDocIdSet(context)); + // we dont pass acceptDocs, we will filter at the end using an additional filter + doChain(result, logic[index[0]], chain[index[0]].getDocIdSet(context, null)); } return result; } Index: modules/queries/src/java/org/apache/lucene/queries/TermsFilter.java =================================================================== --- modules/queries/src/java/org/apache/lucene/queries/TermsFilter.java (revision 1180003) +++ modules/queries/src/java/org/apache/lucene/queries/TermsFilter.java (working copy) @@ -54,7 +54,7 @@ */ @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { IndexReader reader = context.reader; FixedBitSet result = new FixedBitSet(reader.maxDoc()); Fields fields = reader.fields(); @@ -64,7 +64,6 @@ } BytesRef br = new BytesRef(); - Bits liveDocs = reader.getLiveDocs(); String lastField = null; Terms termsC = null; TermsEnum termsEnum = null; @@ -79,7 +78,7 @@ if (terms != null) { // TODO this check doesn't make sense, decide which variable its supposed to be for br.copy(term.bytes()); if (termsEnum.seekCeil(br) == TermsEnum.SeekStatus.FOUND) { - docs = termsEnum.docs(liveDocs, docs); + docs = termsEnum.docs(acceptDocs, docs); while (docs.nextDoc() != DocsEnum.NO_MORE_DOCS) { result.set(docs.docID()); } Index: modules/queries/src/test/org/apache/lucene/queries/BooleanFilterTest.java =================================================================== --- modules/queries/src/test/org/apache/lucene/queries/BooleanFilterTest.java (revision 1180003) +++ modules/queries/src/test/org/apache/lucene/queries/BooleanFilterTest.java (working copy) @@ -35,6 +35,7 @@ import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.QueryWrapperFilter; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.LuceneTestCase; import java.io.IOException; @@ -94,7 +95,7 @@ private Filter getNullDISFilter() { return new Filter() { @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) { return null; } }; @@ -103,7 +104,7 @@ private Filter getNullDISIFilter() { return new Filter() { @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) { return new DocIdSet() { @Override public DocIdSetIterator iterator() { @@ -122,7 +123,7 @@ private void tstFilterCard(String mes, int expected, Filter filt) throws Exception { // BooleanFilter never returns null DIS or null DISI! - DocIdSetIterator disi = filt.getDocIdSet(new AtomicReaderContext(reader)).iterator(); + DocIdSetIterator disi = filt.getDocIdSet(new AtomicReaderContext(reader), reader.getLiveDocs()).iterator(); int actual = 0; while (disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { actual++; Index: modules/queries/src/test/org/apache/lucene/queries/TermsFilterTest.java =================================================================== --- modules/queries/src/test/org/apache/lucene/queries/TermsFilterTest.java (revision 1180003) +++ modules/queries/src/test/org/apache/lucene/queries/TermsFilterTest.java (working copy) @@ -68,19 +68,19 @@ TermsFilter tf = new TermsFilter(); tf.addTerm(new Term(fieldName, "19")); - FixedBitSet bits = (FixedBitSet) tf.getDocIdSet(context); + FixedBitSet bits = (FixedBitSet) tf.getDocIdSet(context, context.reader.getLiveDocs()); assertEquals("Must match nothing", 0, bits.cardinality()); tf.addTerm(new Term(fieldName, "20")); - bits = (FixedBitSet) tf.getDocIdSet(context); + bits = (FixedBitSet) tf.getDocIdSet(context, context.reader.getLiveDocs()); assertEquals("Must match 1", 1, bits.cardinality()); tf.addTerm(new Term(fieldName, "10")); - bits = (FixedBitSet) tf.getDocIdSet(context); + bits = (FixedBitSet) tf.getDocIdSet(context, context.reader.getLiveDocs()); assertEquals("Must match 2", 2, bits.cardinality()); tf.addTerm(new Term(fieldName, "00")); - bits = (FixedBitSet) tf.getDocIdSet(context); + bits = (FixedBitSet) tf.getDocIdSet(context, context.reader.getLiveDocs()); assertEquals("Must match 2", 2, bits.cardinality()); reader.close(); Index: modules/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/NumericRangeFilterBuilder.java =================================================================== --- modules/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/NumericRangeFilterBuilder.java (revision 1180003) +++ modules/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/NumericRangeFilterBuilder.java (working copy) @@ -21,6 +21,7 @@ import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; import org.apache.lucene.search.NumericRangeFilter; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.NumericUtils; import org.apache.lucene.queryparser.xml.DOMUtils; import org.apache.lucene.queryparser.xml.FilterBuilder; @@ -155,7 +156,7 @@ static class NoMatchFilter extends Filter { @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { return null; } Index: modules/queryparser/src/test/org/apache/lucene/queryparser/xml/builders/TestNumericRangeFilterBuilder.java =================================================================== --- modules/queryparser/src/test/org/apache/lucene/queryparser/xml/builders/TestNumericRangeFilterBuilder.java (revision 1180003) +++ modules/queryparser/src/test/org/apache/lucene/queryparser/xml/builders/TestNumericRangeFilterBuilder.java (working copy) @@ -65,7 +65,7 @@ try { IndexReader reader = new SlowMultiReaderWrapper(IndexReader.open(ramDir, true)); try { - assertNull(filter.getDocIdSet((AtomicReaderContext) reader.getTopReaderContext())); + assertNull(filter.getDocIdSet((AtomicReaderContext) reader.getTopReaderContext(), reader.getLiveDocs())); } finally { reader.close(); Index: solr/core/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java =================================================================== --- solr/core/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java (revision 1180003) +++ solr/core/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java (working copy) @@ -260,7 +260,7 @@ // count collection array only needs to be as big as the number of terms we are // going to collect counts for. final int[] counts = this.counts = new int[nTerms]; - DocIdSet idSet = baseSet.getDocIdSet(context); + DocIdSet idSet = baseSet.getDocIdSet(context, context.reader.getLiveDocs()); DocIdSetIterator iter = idSet.iterator(); Index: solr/core/src/java/org/apache/solr/search/DocSet.java =================================================================== --- solr/core/src/java/org/apache/solr/search/DocSet.java (revision 1180003) +++ solr/core/src/java/org/apache/solr/search/DocSet.java (working copy) @@ -18,10 +18,12 @@ package org.apache.solr.search; import org.apache.solr.common.SolrException; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.OpenBitSet; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.BitsFilteredDocIdSet; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader.AtomicReaderContext; @@ -270,18 +272,18 @@ return new Filter() { @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { + public DocIdSet getDocIdSet(final AtomicReaderContext context, final Bits acceptDocs) throws IOException { IndexReader reader = context.reader; if (context.isTopLevel) { - return bs; + return BitsFilteredDocIdSet.wrap(bs, acceptDocs); } final int base = context.docBase; final int maxDoc = reader.maxDoc(); final int max = base + maxDoc; // one past the max doc in this segment. - return new DocIdSet() { + return BitsFilteredDocIdSet.wrap(new DocIdSet() { @Override public DocIdSetIterator iterator() throws IOException { return new DocIdSetIterator() { @@ -313,7 +315,7 @@ return true; } - }; + }, acceptDocs); } }; } Index: solr/core/src/java/org/apache/solr/search/function/ValueSourceRangeFilter.java =================================================================== --- solr/core/src/java/org/apache/solr/search/function/ValueSourceRangeFilter.java (revision 1180003) +++ solr/core/src/java/org/apache/solr/search/function/ValueSourceRangeFilter.java (working copy) @@ -21,7 +21,9 @@ import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.BitsFilteredDocIdSet; import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.util.Bits; import org.apache.solr.search.SolrFilter; import java.io.IOException; @@ -72,13 +74,13 @@ @Override - public DocIdSet getDocIdSet(final Map context, final AtomicReaderContext readerContext) throws IOException { - return new DocIdSet() { + public DocIdSet getDocIdSet(final Map context, final AtomicReaderContext readerContext, Bits acceptDocs) throws IOException { + return BitsFilteredDocIdSet.wrap(new DocIdSet() { @Override public DocIdSetIterator iterator() throws IOException { return valueSource.getValues(context, readerContext).getRangeScorer(readerContext.reader, lowerVal, upperVal, includeLower, includeUpper); } - }; + }, acceptDocs); } @Override Index: solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java =================================================================== --- solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java (revision 1180003) +++ solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java (working copy) @@ -219,7 +219,7 @@ filter = resultSet.getTopFilter(); } - DocIdSet readerSet = filter.getDocIdSet(context); + DocIdSet readerSet = filter.getDocIdSet(context, context.reader.getLiveDocs()); if (readerSet == null) readerSet=DocIdSet.EMPTY_DOCIDSET; return new JoinScorer(this, readerSet.iterator(), getBoost()); } Index: solr/core/src/java/org/apache/solr/search/SolrConstantScoreQuery.java =================================================================== --- solr/core/src/java/org/apache/solr/search/SolrConstantScoreQuery.java (revision 1180003) +++ solr/core/src/java/org/apache/solr/search/SolrConstantScoreQuery.java (working copy) @@ -121,13 +121,13 @@ @Override public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException { - return new ConstantScorer(context, this, queryWeight); + return new ConstantScorer(context, this, queryWeight, acceptDocs); } @Override public Explanation explain(AtomicReaderContext context, int doc) throws IOException { - ConstantScorer cs = new ConstantScorer(context, this, queryWeight); + ConstantScorer cs = new ConstantScorer(context, this, queryWeight, context.reader.getLiveDocs()); boolean exists = cs.docIdSetIterator.advance(doc) == doc; ComplexExplanation result = new ComplexExplanation(); @@ -152,12 +152,14 @@ protected class ConstantScorer extends Scorer { final DocIdSetIterator docIdSetIterator; final float theScore; + final Bits acceptDocs; int doc = -1; - public ConstantScorer(AtomicReaderContext context, ConstantWeight w, float theScore) throws IOException { + public ConstantScorer(AtomicReaderContext context, ConstantWeight w, float theScore, Bits acceptDocs) throws IOException { super(w); this.theScore = theScore; - DocIdSet docIdSet = filter instanceof SolrFilter ? ((SolrFilter)filter).getDocIdSet(w.context, context) : filter.getDocIdSet(context); + this.acceptDocs = acceptDocs; + DocIdSet docIdSet = filter instanceof SolrFilter ? ((SolrFilter)filter).getDocIdSet(w.context, context, acceptDocs) : filter.getDocIdSet(context, acceptDocs); if (docIdSet == null) { docIdSetIterator = DocIdSet.EMPTY_DOCIDSET.iterator(); } else { Index: solr/core/src/java/org/apache/solr/search/SolrFilter.java =================================================================== --- solr/core/src/java/org/apache/solr/search/SolrFilter.java (revision 1180003) +++ solr/core/src/java/org/apache/solr/search/SolrFilter.java (working copy) @@ -20,6 +20,7 @@ import org.apache.lucene.search.Filter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.util.Bits; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader.AtomicReaderContext; @@ -39,10 +40,10 @@ * The context object will be passed to getDocIdSet() where this info can be retrieved. */ public abstract void createWeight(Map context, IndexSearcher searcher) throws IOException; - public abstract DocIdSet getDocIdSet(Map context, AtomicReaderContext readerContext) throws IOException; + public abstract DocIdSet getDocIdSet(Map context, AtomicReaderContext readerContext, Bits acceptDocs) throws IOException; @Override - public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException { - return getDocIdSet(null, context); + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { + return getDocIdSet(null, context, acceptDocs); } } Index: solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java =================================================================== --- solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java (revision 1180003) +++ solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java (working copy) @@ -178,6 +178,8 @@ fieldNames = r.getFieldNames(IndexReader.FieldOption.ALL); + // TODO: don't do this until its safe + setFilterRandomAccessThreshold(0); // do this at the end since an exception in the constructor means we won't close numOpens.incrementAndGet(); } @@ -631,9 +633,10 @@ for (int i=0; i