Index: modules/join/src/java/org/apache/lucene/search/join/BlockJoinQuery.java =================================================================== --- modules/join/src/java/org/apache/lucene/search/join/BlockJoinQuery.java (revision 1176852) +++ modules/join/src/java/org/apache/lucene/search/join/BlockJoinQuery.java (working copy) @@ -168,8 +168,9 @@ // No matches return null; } + if (!(parents instanceof FixedBitSet)) { - throw new IllegalStateException("parentFilter must return FixedBitSet; got " + parents); + throw new IllegalStateException("parentFilter must return FixedBitSet; got " + parents + " filter=" + parentsFilter); } return new BlockJoinScorer(this, childScorer, (FixedBitSet) parents, firstChildDoc, scoreMode); Index: lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java =================================================================== --- lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (revision 1176852) +++ lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (working copy) @@ -249,10 +249,11 @@ AtomicReaderContext context = getLeafContextForField(field); final Spans spans; + Weight.ScorerContext scorerContext = Weight.ScorerContext.def().acceptOnlyDocs(context.reader.getLiveDocs()); if (mustRewriteQuery) { - spans = queries.get(field).getSpans(context); + spans = queries.get(field).getSpans(context, scorerContext); } else { - spans = spanQuery.getSpans(context); + spans = spanQuery.getSpans(context, scorerContext); } Index: lucene/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java (revision 1176852) +++ lucene/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java (working copy) @@ -192,6 +192,8 @@ docs = searcher.search(new MatchAllDocsQuery(), filter, 1); assertEquals("[query + filter] Should find a hit...", 1, docs.totalHits); + + // ignores deletions ConstantScoreQuery constantScore = new ConstantScoreQuery(filter); docs = searcher.search(constantScore, 1); assertEquals("[just filter] Should find a hit...", 1, docs.totalHits); @@ -204,13 +206,15 @@ searcher.close(); searcher = newSearcher(reader, false); + // no hit because IS ANDs del docs: docs = searcher.search(new MatchAllDocsQuery(), filter, 1); assertEquals("[query + filter] Should *not* find a hit...", 0, docs.totalHits); + // no hit because CSQ realizes wrapped filter includes + // non-live docs and ANDs the live docs on the fly: docs = searcher.search(constantScore, 1); - assertEquals("[just filter] Should find a hit...", 1, docs.totalHits); + assertEquals("[just filter] Should not find a hit...", 0, docs.totalHits); - // force cache to regenerate: filter = new CachingWrapperFilter(startFilter, CachingWrapperFilter.DeletesMode.RECACHE); Index: lucene/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java =================================================================== --- lucene/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java (revision 1176852) +++ lucene/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java (working copy) @@ -24,6 +24,7 @@ import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader.ReaderContext; +import org.apache.lucene.search.Weight; import org.apache.lucene.util.ReaderUtil; /** @@ -48,8 +49,8 @@ public static Spans wrap(ReaderContext topLevelReaderContext, SpanQuery query) throws IOException { AtomicReaderContext[] leaves = ReaderUtil.leaves(topLevelReaderContext); - if(leaves.length == 1) { - return query.getSpans(leaves[0]); + if (leaves.length == 1) { + return query.getSpans(leaves[0], Weight.ScorerContext.def().acceptOnlyDocs(leaves[0].reader.getLiveDocs())); } return new MultiSpansWrapper(leaves, query); } @@ -60,14 +61,14 @@ return false; } if (current == null) { - current = query.getSpans(leaves[leafOrd]); + current = query.getSpans(leaves[leafOrd], Weight.ScorerContext.def().acceptOnlyDocs(leaves[leafOrd].reader.getLiveDocs())); } while(true) { if (current.next()) { return true; } if (++leafOrd < leaves.length) { - current = query.getSpans(leaves[leafOrd]); + current = query.getSpans(leaves[leafOrd], Weight.ScorerContext.def().acceptOnlyDocs(leaves[leafOrd].reader.getLiveDocs())); } else { current = null; break; @@ -85,17 +86,17 @@ int subIndex = ReaderUtil.subIndex(target, leaves); assert subIndex >= leafOrd; if (subIndex != leafOrd) { - current = query.getSpans(leaves[subIndex]); + current = query.getSpans(leaves[subIndex], Weight.ScorerContext.def().acceptOnlyDocs(leaves[leafOrd].reader.getLiveDocs())); leafOrd = subIndex; } else if (current == null) { - current = query.getSpans(leaves[leafOrd]); + current = query.getSpans(leaves[leafOrd], Weight.ScorerContext.def().acceptOnlyDocs(leaves[leafOrd].reader.getLiveDocs())); } while (true) { if (current.skipTo(target - leaves[leafOrd].docBase)) { return true; } if (++leafOrd < leaves.length) { - current = query.getSpans(leaves[leafOrd]); + current = query.getSpans(leaves[leafOrd], Weight.ScorerContext.def().acceptOnlyDocs(leaves[leafOrd].reader.getLiveDocs())); } else { current = null; break; Index: lucene/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java =================================================================== --- lucene/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java (revision 1176852) +++ lucene/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java (working copy) @@ -82,7 +82,7 @@ } @Override - public Spans getSpans(AtomicReaderContext context) throws IOException { + public Spans getSpans(AtomicReaderContext context, Weight.ScorerContext scorerContext) throws IOException { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } Index: lucene/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java =================================================================== --- lucene/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java (revision 1176852) +++ lucene/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java (working copy) @@ -160,7 +160,7 @@ return new FieldCacheRangeFilter.FieldCacheDocIdSet(context.reader, true) { @Override - boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException { + public boolean get(int doc) throws ArrayIndexOutOfBoundsException { return termSet.get(fcsi.getOrd(doc)); } }; Index: lucene/src/test/org/apache/lucene/search/TestConstantScoreQuery.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestConstantScoreQuery.java (revision 1176852) +++ lucene/src/test/org/apache/lucene/search/TestConstantScoreQuery.java (working copy) @@ -18,6 +18,7 @@ */ import org.apache.lucene.document.Document; +import org.apache.lucene.document.FieldType; import org.apache.lucene.document.StringField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader.AtomicReaderContext; @@ -130,5 +131,32 @@ if (directory != null) directory.close(); } } + + public void testConstantScoreQueryAndFilter() throws Exception { + Directory d = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random, d); + Document doc = new Document(); + doc.add(newField("field", "a", StringField.TYPE_UNSTORED)); + w.addDocument(doc); + doc = new Document(); + doc.add(newField("field", "b", StringField.TYPE_UNSTORED)); + w.addDocument(doc); + IndexReader r = w.getReader(); + w.close(); + + Filter filterB = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("field", "b"))), CachingWrapperFilter.DeletesMode.RECACHE); + Query query = new ConstantScoreQuery(filterB); + + IndexSearcher s = new IndexSearcher(r); + assertEquals(1, s.search(query, filterB, 1).totalHits); // Query for field:b, Filter field:b + + Filter filterA = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("field", "a"))), CachingWrapperFilter.DeletesMode.RECACHE); + query = new ConstantScoreQuery(filterA); + + assertEquals(0, s.search(query, filterB, 1).totalHits); // Query field:b, Filter field:a + + r.close(); + d.close(); + } } Index: lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java (revision 1176852) +++ lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java (working copy) @@ -87,13 +87,15 @@ searcher.close(); searcher = newSearcher(reader, false); + // no hit because IS ANDs del docs: docs = searcher.search(new MatchAllDocsQuery(), filter, 1); assertEquals("[query + filter] Should *not* find a hit...", 0, docs.totalHits); + // no hit because CSQ realizes wrapped filter includes + // non-live docs and ANDs the live docs on the fly: docs = searcher.search(constantScore, 1); - assertEquals("[just filter] Should find a hit...", 1, docs.totalHits); + assertEquals("[just filter] Should not find a hit...", 0, docs.totalHits); - // force cache to regenerate: filter = new CachingSpanFilter(startFilter, CachingWrapperFilter.DeletesMode.RECACHE); Index: lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java (revision 1176852) +++ lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java (working copy) @@ -126,15 +126,25 @@ } @Override - public Scorer scorer(AtomicReaderContext context, ScorerContext scorerContext) throws IOException { + public Scorer scorer(AtomicReaderContext context, final ScorerContext scorerContext) throws IOException { final DocIdSetIterator disi; if (filter != null) { assert query == null; final DocIdSet dis = filter.getDocIdSet(context); if (dis == null) { return null; + } else if (scorerContext.acceptOnlyDocs != null) { + // Must dynamically AND in the incoming + // acceptOnlyDocs since we have no way to ask + // filter to do so: + disi = new FilteredDocIdSetIterator(dis.iterator()) { + protected boolean match(int doc) throws IOException { + return scorerContext.acceptOnlyDocs.get(doc); + } + }; + } else { + disi = dis.iterator(); } - disi = dis.iterator(); } else { assert query != null && innerWeight != null; disi = innerWeight.scorer(context, scorerContext); Index: lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java (revision 1176852) +++ lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java (working copy) @@ -46,6 +46,8 @@ protected final Q query; + private double randomAccessDocsPercent = 1.0; + /** * Wrap a {@link MultiTermQuery} as a Filter. */ @@ -123,8 +125,10 @@ assert termsEnum != null; if (termsEnum.next() != null) { // fill into a FixedBitSet - final FixedBitSet bitSet = new FixedBitSet(context.reader.maxDoc()); + final int maxDoc = context.reader.maxDoc(); + final FixedBitSet bitSet = new FixedBitSet(maxDoc); int termCount = 0; + int totalCount = 0; final Bits liveDocs = reader.getLiveDocs(); DocsEnum docsEnum = null; do { @@ -135,6 +139,7 @@ final DocsEnum.BulkReadResult result = docsEnum.getBulkResult(); while (true) { final int count = docsEnum.read(); + totalCount += count; if (count != 0) { final int[] docs = result.docs.ints; for (int i = 0; i < count; i++) { @@ -145,12 +150,25 @@ } } } while (termsEnum.next() != null); - // System.out.println(" done termCount=" + termCount); + // System.out.println(" done termCount=" + + // termCount); + if (maxDoc != 0 && (totalCount / (double) maxDoc) >= randomAccessDocsPercent) { + bitSet.setSupportRandomAccess(true); + } query.incTotalNumberOfTerms(termCount); return bitSet; } else { return DocIdSet.EMPTY_DOCIDSET; } } + + /** Set the threshold, as a percent of total number of + * documents in the reader, for enabling random-access + * application of the returned DocIdSet. Higher values + * mean more docs must match before the filter cuts over + * to random-access. Default is 1%. */ + public void setRandomAccessDocsPercent(double v) { + this.randomAccessDocsPercent = v; + } } Index: lucene/src/java/org/apache/lucene/search/MatchAllDocsQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/MatchAllDocsQuery.java (revision 1176852) +++ lucene/src/java/org/apache/lucene/search/MatchAllDocsQuery.java (working copy) @@ -38,9 +38,9 @@ private final int maxDoc; private final Bits liveDocs; - MatchAllScorer(IndexReader reader, Weight w, float score) throws IOException { + MatchAllScorer(IndexReader reader, Bits acceptOnlyDocs, Weight w, float score) throws IOException { super(w); - liveDocs = reader.getLiveDocs(); + liveDocs = acceptOnlyDocs; this.score = score; maxDoc = reader.maxDoc(); } @@ -105,7 +105,7 @@ @Override public Scorer scorer(AtomicReaderContext context, ScorerContext scorerContext) throws IOException { - return new MatchAllScorer(context.reader, this, queryWeight); + return new MatchAllScorer(context.reader, scorerContext.acceptOnlyDocs, this, queryWeight); } @Override Index: lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java (revision 1176852) +++ lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java (working copy) @@ -16,12 +16,12 @@ */ +import java.io.IOException; + +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.util.Bits; -import java.io.IOException; - /** * Wraps another SpanFilter's result and caches it. The purpose is to allow * filters to simply filter, and then wrap with this class to add caching. @@ -83,6 +83,7 @@ missCount++; result = filter.bitSpans(context); + result.getDocIdSet().setLiveDocsOnly(cache.deletesMode == CachingWrapperFilter.DeletesMode.RECACHE); cache.put(coreKey, delCoreKey, result); return result; Index: lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java (revision 1176852) +++ lucene/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java (working copy) @@ -126,7 +126,7 @@ // because deleted docs have an order of 0 (null entry in StringIndex) return new FieldCacheDocIdSet(context.reader, true) { @Override - final boolean matchDoc(int doc) { + public final boolean get(int doc) { final int docOrd = fcsi.getOrd(doc); return docOrd >= inclusiveLowerPoint && docOrd <= inclusiveUpperPoint; } @@ -178,7 +178,7 @@ // we only respect deleted docs if the range contains 0 return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)) { @Override - boolean matchDoc(int doc) { + public boolean get(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; } }; @@ -229,7 +229,7 @@ // ignore deleted docs if range doesn't contain 0 return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)) { @Override - boolean matchDoc(int doc) { + public boolean get(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; } }; @@ -280,7 +280,7 @@ // ignore deleted docs if range doesn't contain 0 return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0)) { @Override - boolean matchDoc(int doc) { + public boolean get(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; } }; @@ -331,7 +331,7 @@ // ignore deleted docs if range doesn't contain 0 return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0L && inclusiveUpperPoint >= 0L)) { @Override - boolean matchDoc(int doc) { + public boolean get(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; } }; @@ -386,7 +386,7 @@ // ignore deleted docs if range doesn't contain 0 return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0.0f && inclusiveUpperPoint >= 0.0f)) { @Override - boolean matchDoc(int doc) { + public boolean get(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; } }; @@ -441,7 +441,7 @@ // ignore deleted docs if range doesn't contain 0 return new FieldCacheDocIdSet(context.reader, !(inclusiveLowerPoint <= 0.0 && inclusiveUpperPoint >= 0.0)) { @Override - boolean matchDoc(int doc) { + public boolean get(int doc) { return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint; } }; @@ -505,20 +505,24 @@ /** Returns the current numeric parser ({@code null} for {@code T} is {@code String}} */ public FieldCache.Parser getParser() { return parser; } - static abstract class FieldCacheDocIdSet extends DocIdSet { + static abstract class FieldCacheDocIdSet extends DocIdSet implements Bits { private final IndexReader reader; private final boolean canIgnoreDeletedDocs; FieldCacheDocIdSet(IndexReader reader, boolean canIgnoreDeletedDocs) { this.reader = reader; this.canIgnoreDeletedDocs = canIgnoreDeletedDocs; + + // Always use random-access to apply FieldCacheRangeFilter: + setSupportRandomAccess(true); } /** * this method checks, if a doc is a hit, should throw AIOBE, when position * invalid */ - abstract boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException; + @Override + public abstract boolean get(int docID) throws ArrayIndexOutOfBoundsException; /** * this DocIdSet is always cacheable (does not go back @@ -530,6 +534,21 @@ } @Override + public Bits getRandomAccessBits() { + return this; + } + + @Override + public boolean isLiveDocsOnly() { + return canIgnoreDeletedDocs; + } + + @Override + public int length() { + return reader.maxDoc(); + } + + @Override public DocIdSetIterator iterator() throws IOException { final Bits liveDocs = canIgnoreDeletedDocs ? null : reader.getLiveDocs(); @@ -548,7 +567,7 @@ try { do { doc++; - } while (!matchDoc(doc)); + } while (!get(doc)); return doc; } catch (ArrayIndexOutOfBoundsException e) { return doc = NO_MORE_DOCS; @@ -559,7 +578,7 @@ public int advance(int target) { try { doc = target; - while (!matchDoc(doc)) { + while (!get(doc)) { doc++; } return doc; @@ -589,14 +608,14 @@ if (doc >= maxDoc) { return doc = NO_MORE_DOCS; } - } while (!liveDocs.get(doc) || !matchDoc(doc)); + } while (!liveDocs.get(doc) || !get(doc)); return doc; } @Override public int advance(int target) { for(doc=target;doc payload = spans.getPayload(); Index: lucene/src/java/org/apache/lucene/search/SpanQueryFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/SpanQueryFilter.java (revision 1176852) +++ lucene/src/java/org/apache/lucene/search/SpanQueryFilter.java (working copy) @@ -61,7 +61,7 @@ public SpanFilterResult bitSpans(AtomicReaderContext context) throws IOException { final FixedBitSet bits = new FixedBitSet(context.reader.maxDoc()); - Spans spans = query.getSpans(context); + Spans spans = query.getSpans(context, Weight.ScorerContext.def().acceptOnlyDocs(context.reader.getLiveDocs())); List tmp = new ArrayList(20); int currentDoc = -1; SpanFilterResult.PositionInfo currentInfo = null; Index: lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java =================================================================== --- lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java (revision 1176852) +++ lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java (working copy) @@ -17,18 +17,19 @@ * limitations under the License. */ -import org.apache.lucene.index.IndexReader.AtomicReaderContext; -import org.apache.lucene.util.ArrayUtil; - import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; import java.util.Comparator; import java.util.HashSet; import java.util.LinkedList; import java.util.List; -import java.util.Collection; import java.util.Set; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.search.Weight; +import org.apache.lucene.util.ArrayUtil; + /** A Spans that is formed from the ordered subspans of a SpanNearQuery * where the subspans do not overlap and have a maximum slop between them. *

@@ -77,11 +78,11 @@ private SpanNearQuery query; private boolean collectPayloads = true; - public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context) throws IOException { - this(spanNearQuery, context, true); + public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, Weight.ScorerContext scorerContext) throws IOException { + this(spanNearQuery, context, scorerContext, true); } - public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, boolean collectPayloads) + public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, Weight.ScorerContext scorerContext, boolean collectPayloads) throws IOException { if (spanNearQuery.getClauses().length < 2) { throw new IllegalArgumentException("Less than 2 clauses: " @@ -94,7 +95,7 @@ matchPayload = new LinkedList(); subSpansByDoc = new Spans[clauses.length]; for (int i = 0; i < clauses.length; i++) { - subSpans[i] = clauses[i].getSpans(context); + subSpans[i] = clauses[i].getSpans(context, scorerContext); subSpansByDoc[i] = subSpans[i]; // used in toSameDoc() } query = spanNearQuery; // kept for toString() only. Index: lucene/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java =================================================================== --- lucene/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java (revision 1176852) +++ lucene/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java (working copy) @@ -17,16 +17,17 @@ * limitations under the License. */ -import org.apache.lucene.index.IndexReader.AtomicReaderContext; -import org.apache.lucene.util.PriorityQueue; - import java.io.IOException; import java.util.ArrayList; import java.util.Collection; +import java.util.HashSet; import java.util.List; import java.util.Set; -import java.util.HashSet; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.search.Weight; +import org.apache.lucene.util.PriorityQueue; + /** * Similar to {@link NearSpansOrdered}, but for the unordered case. * @@ -131,7 +132,7 @@ } - public NearSpansUnordered(SpanNearQuery query, AtomicReaderContext context) + public NearSpansUnordered(SpanNearQuery query, AtomicReaderContext context, Weight.ScorerContext scorerContext) throws IOException { this.query = query; this.slop = query.getSlop(); @@ -141,7 +142,7 @@ subSpans = new Spans[clauses.length]; for (int i = 0; i < clauses.length; i++) { SpansCell cell = - new SpansCell(clauses[i].getSpans(context), i); + new SpansCell(clauses[i].getSpans(context, scorerContext), i); ordered.add(cell); subSpans[i] = cell.spans; } Index: lucene/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java (revision 1176852) +++ lucene/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java (working copy) @@ -92,8 +92,8 @@ // ...this is done to be more consistent with things like SpanFirstQuery @Override - public Spans getSpans(AtomicReaderContext context) throws IOException { - return maskedQuery.getSpans(context); + public Spans getSpans(AtomicReaderContext context, Weight.ScorerContext scorerContext) throws IOException { + return maskedQuery.getSpans(context, scorerContext); } @Override Index: lucene/src/java/org/apache/lucene/search/spans/SpanNotQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/spans/SpanNotQuery.java (revision 1176852) +++ lucene/src/java/org/apache/lucene/search/spans/SpanNotQuery.java (working copy) @@ -21,6 +21,7 @@ import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.search.Query; +import org.apache.lucene.search.Weight; import org.apache.lucene.util.ToStringUtils; import java.io.IOException; @@ -71,76 +72,76 @@ public Object clone() { SpanNotQuery spanNotQuery = new SpanNotQuery((SpanQuery)include.clone(),(SpanQuery) exclude.clone()); spanNotQuery.setBoost(getBoost()); - return spanNotQuery; + return spanNotQuery; } @Override - public Spans getSpans(final AtomicReaderContext context) throws IOException { + public Spans getSpans(final AtomicReaderContext context, final Weight.ScorerContext scorerContext) throws IOException { return new Spans() { - private Spans includeSpans = include.getSpans(context); - private boolean moreInclude = true; + private Spans includeSpans = include.getSpans(context, scorerContext); + private boolean moreInclude = true; - private Spans excludeSpans = exclude.getSpans(context); - private boolean moreExclude = excludeSpans.next(); + private Spans excludeSpans = exclude.getSpans(context, scorerContext); + private boolean moreExclude = excludeSpans.next(); - @Override - public boolean next() throws IOException { - if (moreInclude) // move to next include - moreInclude = includeSpans.next(); + @Override + public boolean next() throws IOException { + if (moreInclude) // move to next include + moreInclude = includeSpans.next(); - while (moreInclude && moreExclude) { + while (moreInclude && moreExclude) { - if (includeSpans.doc() > excludeSpans.doc()) // skip exclude - moreExclude = excludeSpans.skipTo(includeSpans.doc()); + if (includeSpans.doc() > excludeSpans.doc()) // skip exclude + moreExclude = excludeSpans.skipTo(includeSpans.doc()); - while (moreExclude // while exclude is before - && includeSpans.doc() == excludeSpans.doc() - && excludeSpans.end() <= includeSpans.start()) { - moreExclude = excludeSpans.next(); // increment exclude - } + while (moreExclude // while exclude is before + && includeSpans.doc() == excludeSpans.doc() + && excludeSpans.end() <= includeSpans.start()) { + moreExclude = excludeSpans.next(); // increment exclude + } - if (!moreExclude // if no intersection - || includeSpans.doc() != excludeSpans.doc() - || includeSpans.end() <= excludeSpans.start()) - break; // we found a match + if (!moreExclude // if no intersection + || includeSpans.doc() != excludeSpans.doc() + || includeSpans.end() <= excludeSpans.start()) + break; // we found a match - moreInclude = includeSpans.next(); // intersected: keep scanning - } - return moreInclude; + moreInclude = includeSpans.next(); // intersected: keep scanning } + return moreInclude; + } - @Override - public boolean skipTo(int target) throws IOException { - if (moreInclude) // skip include - moreInclude = includeSpans.skipTo(target); + @Override + public boolean skipTo(int target) throws IOException { + if (moreInclude) // skip include + moreInclude = includeSpans.skipTo(target); - if (!moreInclude) - return false; + if (!moreInclude) + return false; - if (moreExclude // skip exclude - && includeSpans.doc() > excludeSpans.doc()) - moreExclude = excludeSpans.skipTo(includeSpans.doc()); + if (moreExclude // skip exclude + && includeSpans.doc() > excludeSpans.doc()) + moreExclude = excludeSpans.skipTo(includeSpans.doc()); - while (moreExclude // while exclude is before - && includeSpans.doc() == excludeSpans.doc() - && excludeSpans.end() <= includeSpans.start()) { - moreExclude = excludeSpans.next(); // increment exclude - } + while (moreExclude // while exclude is before + && includeSpans.doc() == excludeSpans.doc() + && excludeSpans.end() <= includeSpans.start()) { + moreExclude = excludeSpans.next(); // increment exclude + } - if (!moreExclude // if no intersection - || includeSpans.doc() != excludeSpans.doc() - || includeSpans.end() <= excludeSpans.start()) - return true; // we found a match + if (!moreExclude // if no intersection + || includeSpans.doc() != excludeSpans.doc() + || includeSpans.end() <= excludeSpans.start()) + return true; // we found a match - return next(); // scan to next match - } + return next(); // scan to next match + } - @Override - public int doc() { return includeSpans.doc(); } - @Override - public int start() { return includeSpans.start(); } - @Override - public int end() { return includeSpans.end(); } + @Override + public int doc() { return includeSpans.doc(); } + @Override + public int start() { return includeSpans.start(); } + @Override + public int end() { return includeSpans.end(); } // TODO: Remove warning after API has been finalized @Override Index: lucene/src/java/org/apache/lucene/search/spans/SpanWeight.java =================================================================== --- lucene/src/java/org/apache/lucene/search/spans/SpanWeight.java (revision 1176852) +++ lucene/src/java/org/apache/lucene/search/spans/SpanWeight.java (working copy) @@ -68,7 +68,7 @@ @Override public Scorer scorer(AtomicReaderContext context, ScorerContext scorerContext) throws IOException { - return new SpanScorer(query.getSpans(context), this, similarity.sloppyDocScorer(stats, query.getField(), context)); + return new SpanScorer(query.getSpans(context, scorerContext), this, similarity.sloppyDocScorer(stats, query.getField(), context)); } @Override Index: lucene/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java (revision 1176852) +++ lucene/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java (working copy) @@ -17,17 +17,18 @@ */ -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexReader.AtomicReaderContext; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.Query; - import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Set; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Weight; + /** * * @@ -81,8 +82,8 @@ protected abstract AcceptStatus acceptPosition(Spans spans) throws IOException; @Override - public Spans getSpans(final AtomicReaderContext context) throws IOException { - return new PositionCheckSpan(context); + public Spans getSpans(final AtomicReaderContext context, Weight.ScorerContext scorerContext) throws IOException { + return new PositionCheckSpan(context, scorerContext); } @@ -106,8 +107,8 @@ protected class PositionCheckSpan extends Spans { private Spans spans; - public PositionCheckSpan(AtomicReaderContext context) throws IOException { - spans = match.getSpans(context); + public PositionCheckSpan(AtomicReaderContext context, Weight.ScorerContext scorerContext) throws IOException { + spans = match.getSpans(context, scorerContext); } @Override @@ -173,4 +174,4 @@ } } -} \ No newline at end of file +} Index: lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java (revision 1176852) +++ lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java (working copy) @@ -21,6 +21,7 @@ import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.search.Weight; import org.apache.lucene.util.ToStringUtils; import java.io.IOException; @@ -81,9 +82,9 @@ } @Override - public Spans getSpans(final AtomicReaderContext context) throws IOException { + public Spans getSpans(final AtomicReaderContext context, final Weight.ScorerContext scorerContext) throws IOException { final IndexReader reader = context.reader; - final DocsAndPositionsEnum postings = reader.termPositionsEnum(reader.getLiveDocs(), + final DocsAndPositionsEnum postings = reader.termPositionsEnum(scorerContext.acceptOnlyDocs, term.field(), term.bytes()); Index: lucene/src/java/org/apache/lucene/search/spans/SpanQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/spans/SpanQuery.java (revision 1176852) +++ lucene/src/java/org/apache/lucene/search/spans/SpanQuery.java (working copy) @@ -28,7 +28,7 @@ public abstract class SpanQuery extends Query { /** Expert: Returns the matches for this query in an index. Used internally * to search for spans. */ - public abstract Spans getSpans(AtomicReaderContext context) throws IOException; + public abstract Spans getSpans(AtomicReaderContext context, Weight.ScorerContext scorerContext) throws IOException; /** Returns the name of the field matched by this query.*/ public abstract String getField(); Index: lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java (revision 1176852) +++ lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java (working copy) @@ -18,19 +18,19 @@ */ import java.io.IOException; - -import java.util.List; +import java.util.ArrayList; import java.util.Collection; -import java.util.ArrayList; import java.util.Iterator; +import java.util.List; import java.util.Set; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.Term; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Weight; import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.ToStringUtils; -import org.apache.lucene.search.Query; /** Matches the union of its clauses.*/ public class SpanOrQuery extends SpanQuery implements Cloneable { @@ -163,9 +163,9 @@ } @Override - public Spans getSpans(final AtomicReaderContext context) throws IOException { + public Spans getSpans(final AtomicReaderContext context, final Weight.ScorerContext scorerContext) throws IOException { if (clauses.size() == 1) // optimize 1-clause case - return (clauses.get(0)).getSpans(context); + return (clauses.get(0)).getSpans(context, scorerContext); return new Spans() { private SpanQueue queue = null; @@ -174,8 +174,8 @@ queue = new SpanQueue(clauses.size()); Iterator i = clauses.iterator(); while (i.hasNext()) { - Spans spans = i.next().getSpans(context); - if ( ((target == -1) && spans.next()) + Spans spans = i.next().getSpans(context, scorerContext); + if (((target == -1) && spans.next()) || ((target != -1) && spans.skipTo(target))) { queue.add(spans); } Index: lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java =================================================================== --- lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java (revision 1176852) +++ lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java (working copy) @@ -19,14 +19,15 @@ import java.io.IOException; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause.Occur; // javadocs only import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoringRewrite; import org.apache.lucene.search.TopTermsRewrite; -import org.apache.lucene.search.ScoringRewrite; -import org.apache.lucene.search.BooleanClause.Occur; // javadocs only +import org.apache.lucene.search.Weight; import org.apache.lucene.util.TermContext; /** @@ -89,7 +90,7 @@ } @Override - public Spans getSpans(AtomicReaderContext context) throws IOException { + public Spans getSpans(AtomicReaderContext context, Weight.ScorerContext scorerContext) throws IOException { throw new UnsupportedOperationException("Query should have been rewritten"); } Index: lucene/src/java/org/apache/lucene/search/spans/SpanNearQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/spans/SpanNearQuery.java (revision 1176852) +++ lucene/src/java/org/apache/lucene/search/spans/SpanNearQuery.java (working copy) @@ -18,18 +18,16 @@ */ import java.io.IOException; - - -import java.util.List; import java.util.ArrayList; import java.util.Iterator; +import java.util.List; import java.util.Set; - -import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.search.Query; +import org.apache.lucene.search.Weight; import org.apache.lucene.util.ToStringUtils; /** Matches spans which are near one another. One can specify slop, the @@ -117,16 +115,16 @@ } @Override - public Spans getSpans(final AtomicReaderContext context) throws IOException { + public Spans getSpans(final AtomicReaderContext context, Weight.ScorerContext scorerContext) throws IOException { if (clauses.size() == 0) // optimize 0-clause case - return new SpanOrQuery(getClauses()).getSpans(context); + return new SpanOrQuery(getClauses()).getSpans(context, scorerContext); if (clauses.size() == 1) // optimize 1-clause case - return clauses.get(0).getSpans(context); + return clauses.get(0).getSpans(context, scorerContext); return inOrder - ? (Spans) new NearSpansOrdered(this, context, collectPayloads) - : (Spans) new NearSpansUnordered(this, context); + ? (Spans) new NearSpansOrdered(this, context, scorerContext, collectPayloads) + : (Spans) new NearSpansUnordered(this, context, scorerContext); } @Override Index: lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java (revision 1176852) +++ lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java (working copy) @@ -167,7 +167,8 @@ public Scorer scorer(AtomicReaderContext context, ScorerContext scorerContext) throws IOException { assert !termArrays.isEmpty(); final IndexReader reader = context.reader; - final Bits liveDocs = reader.getLiveDocs(); + + final Bits liveDocs = scorerContext.acceptOnlyDocs; PhraseQuery.PostingsAndFreq[] postingsFreqs = new PhraseQuery.PostingsAndFreq[termArrays.size()]; @@ -178,7 +179,7 @@ int docFreq; if (terms.length > 1) { - postingsEnum = new UnionDocsAndPositionsEnum(reader, terms); + postingsEnum = new UnionDocsAndPositionsEnum(scorerContext.acceptOnlyDocs, reader, terms); // coarse -- this overcounts since a given doc can // have more than one terms: @@ -188,7 +189,7 @@ } } else { final Term term = terms[0]; - postingsEnum = reader.termPositionsEnum(liveDocs, + postingsEnum = reader.termPositionsEnum(scorerContext.acceptOnlyDocs, term.field(), term.bytes()); @@ -434,17 +435,17 @@ private DocsQueue _queue; private IntQueue _posList; - public UnionDocsAndPositionsEnum(IndexReader indexReader, Term[] terms) throws IOException { + public UnionDocsAndPositionsEnum(Bits acceptOnlyDocs, IndexReader indexReader, Term[] terms) throws IOException { List docsEnums = new LinkedList(); - final Bits liveDocs = indexReader.getLiveDocs(); + for (int i = 0; i < terms.length; i++) { - DocsAndPositionsEnum postings = indexReader.termPositionsEnum(liveDocs, + DocsAndPositionsEnum postings = indexReader.termPositionsEnum(acceptOnlyDocs, terms[i].field(), terms[i].bytes()); if (postings != null) { docsEnums.add(postings); } else { - if (indexReader.termDocsEnum(liveDocs, terms[i].field(), terms[i].bytes()) != null) { + if (indexReader.termDocsEnum(acceptOnlyDocs, terms[i].field(), terms[i].bytes()) != null) { // term does exist, but has no positions throw new IllegalStateException("field \"" + terms[i].field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + terms[i].text() + ")"); } Index: lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java (revision 1176852) +++ lucene/src/java/org/apache/lucene/search/CachingWrapperFilter.java (working copy) @@ -67,6 +67,8 @@ protected final FilterCache cache; + private double randomAccessDocsPercent = 1.0; + static abstract class FilterCache { /** @@ -76,7 +78,7 @@ // after de-serialize transient Map cache; - private final DeletesMode deletesMode; + final DeletesMode deletesMode; public FilterCache(DeletesMode deletesMode) { this.deletesMode = deletesMode; @@ -216,6 +218,18 @@ // cache miss docIdSet = docIdSetToCache(filter.getDocIdSet(context), reader); + if (docIdSet instanceof FixedBitSet) { + final FixedBitSet bitSet = (FixedBitSet) docIdSet; + // One-time cost on cache miss, which should be worth + // it since it's amortized on future cache misses: + final int setCount = bitSet.cardinality(); + + if ((setCount / (double) reader.maxDoc()) >= randomAccessDocsPercent) { + bitSet.setSupportRandomAccess(true); + bitSet.setLiveDocsOnly(cache.deletesMode != DeletesMode.IGNORE); + } + } + if (docIdSet != null) { cache.put(coreKey, delCoreKey, docIdSet); } @@ -238,4 +252,13 @@ public int hashCode() { return filter.hashCode() ^ 0x1117BF25; } + + /** Set the threshold, as a percent of total number of + * documents in the reader, for enabling random-access + * application of the returned DocIdSet. Higher values + * mean more docs must match before the filter cuts over + * to random-access. Default is 1%. */ + public void setRandomAccessDocsPercent(double v) { + this.randomAccessDocsPercent = v; + } } Index: lucene/src/java/org/apache/lucene/search/BooleanQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/BooleanQuery.java (revision 1176852) +++ lucene/src/java/org/apache/lucene/search/BooleanQuery.java (working copy) @@ -310,9 +310,10 @@ List prohibited = new ArrayList(); List optional = new ArrayList(); Iterator cIter = clauses.iterator(); + final ScorerContext subScorerContext = ScorerContext.def().acceptOnlyDocs(scorerContext.acceptOnlyDocs); for (Weight w : weights) { BooleanClause c = cIter.next(); - Scorer subScorer = w.scorer(context, ScorerContext.def()); + Scorer subScorer = w.scorer(context, subScorerContext); if (subScorer == null) { if (c.isRequired()) { return null; Index: lucene/src/java/org/apache/lucene/search/PhraseQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/PhraseQuery.java (revision 1176852) +++ lucene/src/java/org/apache/lucene/search/PhraseQuery.java (working copy) @@ -35,7 +35,6 @@ import org.apache.lucene.util.TermContext; import org.apache.lucene.util.ToStringUtils; import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.Bits; /** A Query that matches documents containing a particular sequence of terms. * A PhraseQuery is built by QueryParser for input like "new york". @@ -215,7 +214,6 @@ public Scorer scorer(AtomicReaderContext context, ScorerContext scorerContext) throws IOException { assert !terms.isEmpty(); final IndexReader reader = context.reader; - final Bits liveDocs = reader.getLiveDocs(); PostingsAndFreq[] postingsFreqs = new PostingsAndFreq[terms.size()]; for (int i = 0; i < terms.size(); i++) { final Term t = terms.get(i); @@ -224,14 +222,14 @@ assert termNotInReader(reader, field, t.bytes()) : "no termstate found but term exists in reader"; return null; } - DocsAndPositionsEnum postingsEnum = reader.termPositionsEnum(liveDocs, + DocsAndPositionsEnum postingsEnum = reader.termPositionsEnum(scorerContext.acceptOnlyDocs, t.field(), t.bytes(), state); // PhraseQuery on a field that did not index // positions. if (postingsEnum == null) { - assert (reader.termDocsEnum(liveDocs, t.field(), t.bytes(), state) != null) : "termstate found but no term exists in reader"; + assert (reader.termDocsEnum(scorerContext.acceptOnlyDocs, t.field(), t.bytes(), state) != null) : "termstate found but no term exists in reader"; // term does exist, but has no positions throw new IllegalStateException("field \"" + t.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + t.text() + ")"); } Index: lucene/src/java/org/apache/lucene/search/TermQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/TermQuery.java (revision 1176852) +++ lucene/src/java/org/apache/lucene/search/TermQuery.java (working copy) @@ -74,24 +74,26 @@ @Override public Scorer scorer(AtomicReaderContext context, ScorerContext scorerContext) throws IOException { + final String field = term.field(); + final IndexReader reader = context.reader; assert termStates.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termStates.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context); - final TermsEnum termsEnum = getTermsEnum(context); - if (termsEnum == null) { + final TermState state = termStates.get(context.ord); + if (state == null) { // term is not present in that reader + assert termNotInReader(reader, field, term.bytes()) : "no termstate found but term exists in reader"; return null; } - // TODO should we reuse the DocsEnum here? - final DocsEnum docs = termsEnum.docs(context.reader.getLiveDocs(), null); + final DocsEnum docs = reader.termDocsEnum(scorerContext.acceptOnlyDocs, field, term.bytes(), state); assert docs != null; return new TermScorer(this, docs, createDocScorer(context)); } - + /** * Creates an {@link ExactDocScorer} for this {@link TermWeight}*/ ExactDocScorer createDocScorer(AtomicReaderContext context) throws IOException { return similarity.exactDocScorer(stats, term.field(), context); } - + /** * Returns a {@link TermsEnum} positioned at this weights Term or null if * the term does not exist in the given context @@ -110,7 +112,6 @@ private boolean termNotInReader(IndexReader reader, String field, BytesRef bytes) throws IOException { // only called from assert - //System.out.println("TQ.termNotInReader reader=" + reader + " term=" + field + ":" + bytes.utf8ToString()); final Terms terms = reader.terms(field); return terms == null || terms.docFreq(bytes) == 0; } Index: lucene/src/java/org/apache/lucene/search/DocIdSet.java =================================================================== --- lucene/src/java/org/apache/lucene/search/DocIdSet.java (revision 1176852) +++ lucene/src/java/org/apache/lucene/search/DocIdSet.java (working copy) @@ -19,12 +19,23 @@ import java.io.IOException; +import org.apache.lucene.util.Bits; + /** * A DocIdSet contains a set of doc ids. Implementing classes must * only implement {@link #iterator} to provide access to the set. */ public abstract class DocIdSet { + // If true, this instance implements the Bits interface + // and the filter should be applied random-access instead + // by iteration: + protected boolean supportRandomAccess; + + // If true, this filter includes only live documents (ie, + // deleted documents have already been filtered out): + protected boolean liveDocsOnly; + /** An empty {@code DocIdSet} instance for easy use, e.g. in Filters that hit no documents. */ public static final DocIdSet EMPTY_DOCIDSET = new DocIdSet() { @@ -46,8 +57,13 @@ public boolean isCacheable() { return true; } + + @Override + public Bits getRandomAccessBits() { + return null; + } }; - + /** Provides a {@link DocIdSetIterator} to access the set. * This implementation can return null or * {@linkplain #EMPTY_DOCIDSET}.iterator() if there @@ -64,4 +80,58 @@ public boolean isCacheable() { return false; } + + /** + * Allows this DocIdSet to be marked as containing only live documents + * (documents that have not been deleted). + * + * @param liveDocsOnly Whether this DocIdSet contains only live documents + */ + public void setLiveDocsOnly(boolean liveDocsOnly) { + this.liveDocsOnly = liveDocsOnly; + } + + /** + * Used if {@link #getRandomAccessBits()} returns non-null, meaning this DocIdSet + * will be accessed randomly. + * + * @return {@code true} if this DocIdSet contains only live documents (documents + * that have not been deleted). + */ + public boolean isLiveDocsOnly() { + return liveDocsOnly; + } + + /** + * Return a Bits impl if this DocIdSet should be applied via random-access + * (because the underlying bit set implementation supports random access, + * and the filter is dense enough), instead of {@link DocIdSetIterator}. + * + * @return Bits representation of this DocIdSet that can be used for random-access + * or {@code null} if the DocIdSet does not support random-access + */ + public Bits getRandomAccessBits() { + return null; + } + + /** + * Controls whether this DocIdSet should support random-access or not (even + * if it were possible to convert to a Bits representation). + * + * @param supportRandomAccess Whether this DocIdSet should support random-access + * through {@link #getRandomAccessBits()} + */ + public void setSupportRandomAccess(boolean supportRandomAccess) { + this.supportRandomAccess = supportRandomAccess; + } + + /** + * Whether this DocIdSet supports random-access (even if {@link #getRandomAccessBits()} + * could return a non-null value). + * + * @return {@code true} if this DocIdSet supports random-access + */ + public boolean supportsRandomAccess() { + return supportRandomAccess; + } } Index: lucene/src/java/org/apache/lucene/search/QueryWrapperFilter.java =================================================================== --- lucene/src/java/org/apache/lucene/search/QueryWrapperFilter.java (revision 1176852) +++ lucene/src/java/org/apache/lucene/search/QueryWrapperFilter.java (working copy) @@ -56,7 +56,7 @@ return new DocIdSet() { @Override public DocIdSetIterator iterator() throws IOException { - return weight.scorer(privateContext, ScorerContext.def()); + return weight.scorer(privateContext, ScorerContext.def().acceptOnlyDocs(context.reader.getLiveDocs())); } @Override public boolean isCacheable() { return false; } Index: lucene/src/java/org/apache/lucene/search/IndexSearcher.java =================================================================== --- lucene/src/java/org/apache/lucene/search/IndexSearcher.java (revision 1176852) +++ lucene/src/java/org/apache/lucene/search/IndexSearcher.java (working copy) @@ -42,6 +42,8 @@ import org.apache.lucene.search.similarities.SimilarityProvider; import org.apache.lucene.store.Directory; import org.apache.lucene.store.NIOFSDirectory; // javadoc +import org.apache.lucene.util.AndBits; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.ThreadInterruptedException; @@ -276,7 +278,7 @@ } /** Finds the top n - * hits for query where all results are after a previous + * hits for query where all results are after a previous * result (after). *

* By passing the bottom result from a previous page as after, @@ -288,7 +290,7 @@ public TopDocs searchAfter(ScoreDoc after, Query query, int n) throws IOException { return searchAfter(after, query, null, n); } - + /** Finds the top n * hits for query, applying filter if non-null, * where all results are after a previous result (after). @@ -302,7 +304,7 @@ public TopDocs searchAfter(ScoreDoc after, Query query, Filter filter, int n) throws IOException { return search(createNormalizedWeight(query), filter, after, n); } - + /** Finds the top n * hits for query. * @@ -569,13 +571,14 @@ // TODO: should we make this // threaded...? the Collector could be sync'd? - ScorerContext scorerContext = ScorerContext.def().scoreDocsInOrder(true).topScorer(true); // always use single thread: if (filter == null) { + ScorerContext scorerContext = ScorerContext.def().scoreDocsInOrder(true).topScorer(true); for (int i = 0; i < leaves.length; i++) { // search each subreader collector.setNextReader(leaves[i]); - scorerContext = scorerContext.scoreDocsInOrder(!collector.acceptsDocsOutOfOrder()); - Scorer scorer = weight.scorer(leaves[i], scorerContext); + Scorer scorer = weight.scorer(leaves[i], + scorerContext.scoreDocsInOrder(!collector.acceptsDocsOutOfOrder()) + .acceptOnlyDocs(leaves[i].reader.getLiveDocs())); if (scorer != null) { scorer.score(collector); } @@ -592,44 +595,75 @@ final Filter filter, final Collector collector) throws IOException { assert filter != null; - - Scorer scorer = weight.scorer(context, ScorerContext.def()); - if (scorer == null) { - return; - } + //System.out.println("IS.searchWithFilter r=" + context.reader); - int docID = scorer.docID(); - assert docID == -1 || docID == DocIdSetIterator.NO_MORE_DOCS; - // CHECKME: use ConjunctionScorer here? DocIdSet filterDocIdSet = filter.getDocIdSet(context); if (filterDocIdSet == null) { // this means the filter does not accept any documents. return; } + + ScorerContext scorerContext = ScorerContext.def(); - DocIdSetIterator filterIter = filterDocIdSet.iterator(); - if (filterIter == null) { - // this means the filter does not accept any documents. + Bits liveDocs = context.reader.getLiveDocs(); + Bits randomAccessBits = null; + + if (filterDocIdSet.supportsRandomAccess()) { + randomAccessBits = filterDocIdSet.getRandomAccessBits(); + if (randomAccessBits != null) { + // Filter by random-access: we push the filter all the + // way down to the atomic scorers, so the bits are + // applied just like deleted docs: + if (liveDocs != null && !filterDocIdSet.isLiveDocsOnly()) { + // TODO: we could swap order of these two if we knew + // which is more restrictive?: + randomAccessBits = new AndBits(randomAccessBits, liveDocs); + } + scorerContext = scorerContext.scoreDocsInOrder(!collector.acceptsDocsOutOfOrder()).topScorer(true).acceptOnlyDocs(randomAccessBits); + } else { + scorerContext = scorerContext.acceptOnlyDocs(liveDocs); + } + } else if (liveDocs != null) { + scorerContext = scorerContext.acceptOnlyDocs(liveDocs); + } + + Scorer scorer = weight.scorer(context, scorerContext); + if (scorer == null) { return; } - int filterDoc = filterIter.nextDoc(); - int scorerDoc = scorer.advance(filterDoc); - - collector.setScorer(scorer); - while (true) { - if (scorerDoc == filterDoc) { - // Check if scorer has exhausted, only before collecting. - if (scorerDoc == DocIdSetIterator.NO_MORE_DOCS) { - break; + + if (randomAccessBits != null) { + collector.setNextReader(context); + scorer.score(collector); + } else { + int docID = scorer.docID(); + assert docID == -1 || docID == DocIdSetIterator.NO_MORE_DOCS; + DocIdSetIterator filterIter = filterDocIdSet.iterator(); + if (filterIter == null) { + // this means the filter does not accept any documents. + return; + } + int filterDoc = filterIter.nextDoc(); + int scorerDoc = scorer.advance(filterDoc); + + // Filter by iteration: + collector.setScorer(scorer); + while (true) { + if (scorerDoc == filterDoc) { + // Check if scorer has exhausted, only before collecting. + if (scorerDoc == DocIdSetIterator.NO_MORE_DOCS) { + break; + } + //System.out.println(" c=" + scorerDoc); + collector.collect(scorerDoc); + filterDoc = filterIter.nextDoc(); + scorerDoc = scorer.advance(filterDoc); + } else if (scorerDoc > filterDoc) { + filterDoc = filterIter.advance(scorerDoc); + } else { + scorerDoc = scorer.advance(filterDoc); } - collector.collect(scorerDoc); - filterDoc = filterIter.nextDoc(); - scorerDoc = scorer.advance(filterDoc); - } else if (scorerDoc > filterDoc) { - filterDoc = filterIter.advance(scorerDoc); - } else { - scorerDoc = scorer.advance(filterDoc); } } } Index: lucene/src/java/org/apache/lucene/search/Weight.java =================================================================== --- lucene/src/java/org/apache/lucene/search/Weight.java (revision 1176852) +++ lucene/src/java/org/apache/lucene/search/Weight.java (working copy) @@ -19,10 +19,9 @@ import java.io.IOException; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader.ReaderContext; -import org.apache.lucene.search.similarities.SimilarityProvider; +import org.apache.lucene.util.Bits; /** * Expert: Calculate query weights and build query scorers. @@ -139,9 +138,12 @@ */ public final boolean topScorer; - - private static final ScorerContext DEFAULT_CONTEXT = new ScorerContext(true, false); + /** If non-null, the returned scorer should filter + * according to this {@link Bits} instance. */ + public final Bits acceptOnlyDocs; + private static final ScorerContext DEFAULT_CONTEXT = new ScorerContext(true, false, null); + /** * Returns a default {@link ScorerContext} template initialized with: *

    @@ -153,9 +155,10 @@ return DEFAULT_CONTEXT; } - private ScorerContext(boolean scoreDocsInOrder, boolean topScorer) { + private ScorerContext(boolean scoreDocsInOrder, boolean topScorer, Bits acceptOnlyDocs) { this.scoreDocsInOrder = scoreDocsInOrder; this.topScorer = topScorer; + this.acceptOnlyDocs = acceptOnlyDocs; } /** @@ -169,7 +172,7 @@ if (this.scoreDocsInOrder == scoreDocsInOrder) { return this; } - return new ScorerContext(scoreDocsInOrder, topScorer); + return new ScorerContext(scoreDocsInOrder, topScorer, acceptOnlyDocs); } /** @@ -183,7 +186,21 @@ if (this.topScorer == topScorer) { return this; } - return new ScorerContext(scoreDocsInOrder, topScorer); + return new ScorerContext(scoreDocsInOrder, topScorer, acceptOnlyDocs); } + + /** + * Creates and returns a copy of this context with the given value for + * {@link #topScorer} and returns a new instance of + * {@link ScorerContext} iff the given value differs from the + * {@link #topScorer}. Otherwise, this method has no effect and + * returns this instance. + */ + public ScorerContext acceptOnlyDocs(Bits acceptOnlyDocs) { + if (this.acceptOnlyDocs == acceptOnlyDocs) { + return this; + } + return new ScorerContext(scoreDocsInOrder, topScorer, acceptOnlyDocs); + } } } Index: lucene/src/java/org/apache/lucene/util/FixedBitSet.java =================================================================== --- lucene/src/java/org/apache/lucene/util/FixedBitSet.java (revision 1176852) +++ lucene/src/java/org/apache/lucene/util/FixedBitSet.java (working copy) @@ -372,6 +372,11 @@ } @Override + public Bits getRandomAccessBits() { + return this; + } + + @Override public Object clone() { return new FixedBitSet(this); } Index: lucene/src/test-framework/org/apache/lucene/search/CheckHits.java =================================================================== --- lucene/src/test-framework/org/apache/lucene/search/CheckHits.java (revision 1176852) +++ lucene/src/test-framework/org/apache/lucene/search/CheckHits.java (working copy) @@ -101,10 +101,9 @@ for (int i = -1; i < 2; i++) { actual.clear(); - IndexSearcher s = QueryUtils.wrapUnderlyingReader - (random, searcher, i); + IndexSearcher s = QueryUtils.wrapUnderlyingReader(random, searcher, i); s.search(query, c); - Assert.assertEquals("Wrap Reader " + i + ": " + + Assert.assertEquals("Wrap Reader " + i + ": s=" + s + " query=" + query.toString(defaultFieldName), correct, actual); FieldCache.DEFAULT.purge(s.getIndexReader()); // our wrapping can create insanity otherwise Index: lucene/src/test-framework/org/apache/lucene/search/QueryUtils.java =================================================================== --- lucene/src/test-framework/org/apache/lucene/search/QueryUtils.java (revision 1176852) +++ lucene/src/test-framework/org/apache/lucene/search/QueryUtils.java (working copy) @@ -256,7 +256,7 @@ try { if (scorer == null) { Weight w = s.createNormalizedWeight(q); - scorer = w.scorer(readerContextArray[leafPtr], ScorerContext.def()); + scorer = w.scorer(readerContextArray[leafPtr], ScorerContext.def().acceptOnlyDocs(readerContextArray[leafPtr].reader.getLiveDocs())); } int op = order[(opidx[0]++) % order.length]; @@ -301,7 +301,7 @@ final IndexReader previousReader = lastReader[0]; IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader); Weight w = indexSearcher.createNormalizedWeight(q); - Scorer scorer = w.scorer((AtomicReaderContext)indexSearcher.getTopReaderContext(), ScorerContext.def()); + Scorer scorer = w.scorer((AtomicReaderContext)indexSearcher.getTopReaderContext(), ScorerContext.def().acceptOnlyDocs(previousReader.getLiveDocs())); if (scorer != null) { boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS; Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more); @@ -327,7 +327,7 @@ final IndexReader previousReader = lastReader[0]; IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader, false); Weight w = indexSearcher.createNormalizedWeight(q); - Scorer scorer = w.scorer((AtomicReaderContext)previousReader.getTopReaderContext(), ScorerContext.def()); + Scorer scorer = w.scorer((AtomicReaderContext)previousReader.getTopReaderContext(), ScorerContext.def().acceptOnlyDocs(previousReader.getLiveDocs())); if (scorer != null) { boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS; Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more); @@ -358,7 +358,7 @@ long startMS = System.currentTimeMillis(); for (int i=lastDoc[0]+1; i<=doc; i++) { Weight w = s.createNormalizedWeight(q); - Scorer scorer = w.scorer(context[leafPtr], ScorerContext.def()); + Scorer scorer = w.scorer(context[leafPtr], ScorerContext.def().acceptOnlyDocs(context[leafPtr].reader.getLiveDocs())); Assert.assertTrue("query collected "+doc+" but skipTo("+i+") says no more docs!",scorer.advance(i) != DocIdSetIterator.NO_MORE_DOCS); Assert.assertEquals("query collected "+doc+" but skipTo("+i+") got to "+scorer.docID(),doc,scorer.docID()); float skipToScore = scorer.score(); @@ -385,7 +385,7 @@ final IndexReader previousReader = lastReader[0]; IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader); Weight w = indexSearcher.createNormalizedWeight(q); - Scorer scorer = w.scorer((AtomicReaderContext)indexSearcher.getTopReaderContext(), ScorerContext.def()); + Scorer scorer = w.scorer((AtomicReaderContext)indexSearcher.getTopReaderContext(), ScorerContext.def().acceptOnlyDocs(previousReader.getLiveDocs())); if (scorer != null) { boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS; Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more); @@ -409,7 +409,7 @@ final IndexReader previousReader = lastReader[0]; IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader); Weight w = indexSearcher.createNormalizedWeight(q); - Scorer scorer = w.scorer((AtomicReaderContext)indexSearcher.getTopReaderContext(), ScorerContext.def()); + Scorer scorer = w.scorer((AtomicReaderContext)indexSearcher.getTopReaderContext(), ScorerContext.def().acceptOnlyDocs(previousReader.getLiveDocs())); if (scorer != null) { boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS; Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more); Index: lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java =================================================================== --- lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java (revision 1176852) +++ lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java (working copy) @@ -1301,6 +1301,16 @@ if (ex != null && VERBOSE) { System.out.println("NOTE: newSearcher using ExecutorService with " + threads + " threads"); } + + // TODO: maybe fix AssertingIndexSearcher so that it + // randomly forces "low" filtering for search methods + // taking Filter? + + // TODO: maybe fix AssertingIndexSearcher so that if + // reader has deletions, it randomly pretends it does + // not and then passes a filter to all search methods + // instead? + IndexSearcher ret = random.nextBoolean() ? new AssertingIndexSearcher(r, ex) { @Override