Index: src/java/org/apache/lucene/search/Scorer.java =================================================================== --- src/java/org/apache/lucene/search/Scorer.java (revision 740806) +++ src/java/org/apache/lucene/search/Scorer.java (working copy) @@ -18,6 +18,7 @@ */ import java.io.IOException; +import org.apache.lucene.util.OpenBitSet; /** * Expert: Common scoring functionality for different types of queries. Index: src/java/org/apache/lucene/search/TermQuery.java =================================================================== --- src/java/org/apache/lucene/search/TermQuery.java (revision 740806) +++ src/java/org/apache/lucene/search/TermQuery.java (working copy) @@ -66,8 +66,10 @@ if (termDocs == null) return null; - return new TermScorer(this, termDocs, similarity, - reader.norms(term.field())); + TermScorer s = new TermScorer(this, termDocs, similarity, + reader.norms(term.field())); + s.term = term; + return s; } public Explanation explain(IndexReader reader, int doc) Index: src/java/org/apache/lucene/search/IndexSearcher.java =================================================================== --- src/java/org/apache/lucene/search/IndexSearcher.java (revision 740806) +++ src/java/org/apache/lucene/search/IndexSearcher.java (working copy) @@ -27,6 +27,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.OpenBitSet; /** Implements search over a single IndexReader. * @@ -250,19 +251,47 @@ doSearch(sortedSubReaders[i], weight, filter, collector); } } + + static public boolean allowFilterPush; + + static public OpenBitSet bits; private void doSearch(IndexReader reader, Weight weight, Filter filter, final HitCollector results) throws IOException { + final OpenBitSet bitsLocal; + + if (filter != null) { + bitsLocal = (OpenBitSet) filter.getDocIdSet(reader); + } else { + bitsLocal = null; + } + + // nocommit + if (allowFilterPush) { + bits = bitsLocal; + } else { + bits = null; + } + Scorer scorer = weight.scorer(reader); if (scorer == null) return; - if (filter == null) { + if (allowFilterPush || filter == null) { scorer.score(results); return; } + while (scorer.next()) { + final int docID = scorer.doc(); + if (bitsLocal.fastGet(docID)) { + results.collect(docID, scorer.score()); + } + } + + /* + DocIdSetIterator filterDocIdIterator = filter.getDocIdSet(reader).iterator(); // CHECKME: use ConjunctionScorer here? boolean more = filterDocIdIterator.next() && scorer.skipTo(filterDocIdIterator.doc()); @@ -281,6 +310,7 @@ } } } + */ } public Query rewrite(Query original) throws IOException { Index: src/java/org/apache/lucene/search/TermScorer.java =================================================================== --- src/java/org/apache/lucene/search/TermScorer.java (revision 740806) +++ src/java/org/apache/lucene/search/TermScorer.java (working copy) @@ -20,6 +20,7 @@ import java.io.IOException; import org.apache.lucene.index.TermDocs; +import org.apache.lucene.index.Term; /** Expert: A Scorer for documents matching a Term. */ @@ -38,6 +39,8 @@ private static final int SCORE_CACHE_SIZE = 32; private float[] scoreCache = new float[SCORE_CACHE_SIZE]; + Term term; + /** Construct a TermScorer. * @param weight The weight of the Term in the query. * @param td An iterator over the documents matching the Term. Index: src/java/org/apache/lucene/index/SegmentTermDocs.java =================================================================== --- src/java/org/apache/lucene/index/SegmentTermDocs.java (revision 740806) +++ src/java/org/apache/lucene/index/SegmentTermDocs.java (working copy) @@ -20,6 +20,8 @@ import java.io.IOException; import org.apache.lucene.util.BitVector; import org.apache.lucene.store.IndexInput; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.util.OpenBitSet; class SegmentTermDocs implements TermDocs { protected SegmentReader parent; @@ -33,7 +35,8 @@ private int skipInterval; private int maxSkipLevels; private DefaultSkipListReader skipListReader; - + private final OpenBitSet bits; + private long freqBasePointer; private long proxBasePointer; @@ -46,6 +49,7 @@ protected SegmentTermDocs(SegmentReader parent) { this.parent = parent; this.freqStream = (IndexInput) parent.freqStream.clone(); + this.bits = IndexSearcher.bits; synchronized (parent) { this.deletedDocs = parent.deletedDocs; } @@ -53,9 +57,12 @@ this.maxSkipLevels = parent.tis.getMaxSkipLevels(); } + Term term; + public void seek(Term term) throws IOException { TermInfo ti = parent.tis.get(term); seek(ti, term); + this.term = term; } public void seek(TermEnum termEnum) throws IOException { @@ -124,8 +131,10 @@ count++; - if (deletedDocs == null || !deletedDocs.get(doc)) + if ((deletedDocs == null || !deletedDocs.get(doc)) && + bits == null || bits.fastGet(doc)) { break; + } skippingDoc(); } return true; @@ -149,7 +158,8 @@ freq = freqStream.readVInt(); // else read freq count++; - if (deletedDocs == null || !deletedDocs.get(doc)) { + if ((deletedDocs == null || !deletedDocs.get(doc)) && + (bits == null || bits.fastGet(doc))) { docs[i] = doc; freqs[i] = freq; ++i; @@ -207,6 +217,7 @@ if (!next()) return false; } while (target > doc); + return true; } } Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTask.java =================================================================== --- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTask.java (revision 740806) +++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTask.java (working copy) @@ -19,6 +19,15 @@ import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.feeds.QueryMaker; +import org.apache.lucene.util.OpenBitSet; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.Filter; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.index.IndexReader; +import java.util.Random; +import java.util.HashMap; +import java.io.IOException; /** * Search task. @@ -28,8 +37,17 @@ */ public class SearchTask extends ReadTask { + private final double fpct; + public SearchTask(PerfRunData runData) { super(runData); + fpct = getRunData().getConfig().get("filter.pct", 0.0); + final boolean ooo = getRunData().getConfig().get("docsoutoforder", false); + System.out.println("OOO=" + ooo); + BooleanQuery.setAllowDocsOutOfOrder(ooo); + final boolean filterPush = getRunData().getConfig().get("filterpush", false); + System.out.println("FILTPUSH=" + filterPush); + IndexSearcher.allowFilterPush = filterPush; } public boolean withRetrieve() { @@ -52,5 +70,66 @@ return getRunData().getQueryMaker(this); } + Filter filter; + private static boolean first2 = true; + public Filter getFilter() { + if (filter == null) { + if (fpct < 100.0) { + System.out.println("FILT=" + fpct); + filter = new MyFilter(fpct); + } else if (first2) { + System.out.println("FILT=none"); + first2 = false; + } + } + return filter; + } + + static final HashMap filters = new HashMap(); + private class MyFilter extends Filter { + private final double fpct; + + MyFilter(double fpct) { + this.fpct = fpct; + } + + public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + OpenBitSet bits = (OpenBitSet) filters.get(reader); + if (bits == null) { + final int numDocs = reader.maxDoc(); + bits = new OpenBitSet(numDocs); + final Random r = new java.util.Random(17); + int setCount = 0; + final boolean inverted; + final double fpct2; + if (fpct > 50.0) { + fpct2 = 100.0 - fpct; + inverted = true; + } else { + fpct2 = fpct; + inverted = false; + } + + final int target = (int) ((fpct2/100.0)*numDocs); + while(setCount < target) { + setCount += bits.getAndSet(r.nextInt(numDocs)) ? 0:1; + } + if (setCount != bits.cardinality()) { + throw new RuntimeException("count mismatch"); + } + if (inverted) { + OpenBitSet bits2 = new OpenBitSet(numDocs); + for(int i=0;i 0) { if (sort != null) { - hits = searcher.search(q, null, numHits, sort); + hits = searcher.search(q, filter, numHits, sort); } else { - hits = searcher.search(q, numHits); + hits = searcher.search(q, filter, numHits); } //System.out.println("q=" + q + ":" + hits.totalHits + " total hits"); - + if (first) { + System.out.println("NUMHITS=" + hits.totalHits); + System.out.println("q=" + q); + first = false; + } if (withTraverse()) { final ScoreDoc[] scoreDocs = hits.scoreDocs; int traversalSize = Math.min(scoreDocs.length, traversalSize());