Index: src/java/org/apache/lucene/search/Scorer.java
===================================================================
--- src/java/org/apache/lucene/search/Scorer.java (revision 740806)
+++ src/java/org/apache/lucene/search/Scorer.java (working copy)
@@ -18,6 +18,7 @@
*/
import java.io.IOException;
+import org.apache.lucene.util.OpenBitSet;
/**
* Expert: Common scoring functionality for different types of queries.
Index: src/java/org/apache/lucene/search/TermQuery.java
===================================================================
--- src/java/org/apache/lucene/search/TermQuery.java (revision 740806)
+++ src/java/org/apache/lucene/search/TermQuery.java (working copy)
@@ -66,8 +66,10 @@
if (termDocs == null)
return null;
- return new TermScorer(this, termDocs, similarity,
- reader.norms(term.field()));
+ TermScorer s = new TermScorer(this, termDocs, similarity,
+ reader.norms(term.field()));
+ s.term = term;
+ return s;
}
public Explanation explain(IndexReader reader, int doc)
Index: src/java/org/apache/lucene/search/IndexSearcher.java
===================================================================
--- src/java/org/apache/lucene/search/IndexSearcher.java (revision 740806)
+++ src/java/org/apache/lucene/search/IndexSearcher.java (working copy)
@@ -27,6 +27,7 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.OpenBitSet;
/** Implements search over a single IndexReader.
*
@@ -250,19 +251,47 @@
doSearch(sortedSubReaders[i], weight, filter, collector);
}
}
+
+ static public boolean allowFilterPush;
+
+ static public OpenBitSet bits;
private void doSearch(IndexReader reader, Weight weight, Filter filter,
final HitCollector results) throws IOException {
+ final OpenBitSet bitsLocal;
+
+ if (filter != null) {
+ bitsLocal = (OpenBitSet) filter.getDocIdSet(reader);
+ } else {
+ bitsLocal = null;
+ }
+
+ // nocommit
+ if (allowFilterPush) {
+ bits = bitsLocal;
+ } else {
+ bits = null;
+ }
+
Scorer scorer = weight.scorer(reader);
if (scorer == null)
return;
- if (filter == null) {
+ if (allowFilterPush || filter == null) {
scorer.score(results);
return;
}
+ while (scorer.next()) {
+ final int docID = scorer.doc();
+ if (bitsLocal.fastGet(docID)) {
+ results.collect(docID, scorer.score());
+ }
+ }
+
+ /*
+
DocIdSetIterator filterDocIdIterator = filter.getDocIdSet(reader).iterator(); // CHECKME: use ConjunctionScorer here?
boolean more = filterDocIdIterator.next() && scorer.skipTo(filterDocIdIterator.doc());
@@ -281,6 +310,7 @@
}
}
}
+ */
}
public Query rewrite(Query original) throws IOException {
Index: src/java/org/apache/lucene/search/TermScorer.java
===================================================================
--- src/java/org/apache/lucene/search/TermScorer.java (revision 740806)
+++ src/java/org/apache/lucene/search/TermScorer.java (working copy)
@@ -20,6 +20,7 @@
import java.io.IOException;
import org.apache.lucene.index.TermDocs;
+import org.apache.lucene.index.Term;
/** Expert: A Scorer for documents matching a Term.
*/
@@ -38,6 +39,8 @@
private static final int SCORE_CACHE_SIZE = 32;
private float[] scoreCache = new float[SCORE_CACHE_SIZE];
+ Term term;
+
/** Construct a TermScorer.
* @param weight The weight of the Term in the query.
* @param td An iterator over the documents matching the Term.
Index: src/java/org/apache/lucene/index/SegmentTermDocs.java
===================================================================
--- src/java/org/apache/lucene/index/SegmentTermDocs.java (revision 740806)
+++ src/java/org/apache/lucene/index/SegmentTermDocs.java (working copy)
@@ -20,6 +20,8 @@
import java.io.IOException;
import org.apache.lucene.util.BitVector;
import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.util.OpenBitSet;
class SegmentTermDocs implements TermDocs {
protected SegmentReader parent;
@@ -33,7 +35,8 @@
private int skipInterval;
private int maxSkipLevels;
private DefaultSkipListReader skipListReader;
-
+ private final OpenBitSet bits;
+
private long freqBasePointer;
private long proxBasePointer;
@@ -46,6 +49,7 @@
protected SegmentTermDocs(SegmentReader parent) {
this.parent = parent;
this.freqStream = (IndexInput) parent.freqStream.clone();
+ this.bits = IndexSearcher.bits;
synchronized (parent) {
this.deletedDocs = parent.deletedDocs;
}
@@ -53,9 +57,12 @@
this.maxSkipLevels = parent.tis.getMaxSkipLevels();
}
+ Term term;
+
public void seek(Term term) throws IOException {
TermInfo ti = parent.tis.get(term);
seek(ti, term);
+ this.term = term;
}
public void seek(TermEnum termEnum) throws IOException {
@@ -124,8 +131,10 @@
count++;
- if (deletedDocs == null || !deletedDocs.get(doc))
+ if ((deletedDocs == null || !deletedDocs.get(doc)) &&
+ bits == null || bits.fastGet(doc)) {
break;
+ }
skippingDoc();
}
return true;
@@ -149,7 +158,8 @@
freq = freqStream.readVInt(); // else read freq
count++;
- if (deletedDocs == null || !deletedDocs.get(doc)) {
+ if ((deletedDocs == null || !deletedDocs.get(doc)) &&
+ (bits == null || bits.fastGet(doc))) {
docs[i] = doc;
freqs[i] = freq;
++i;
@@ -207,6 +217,7 @@
if (!next())
return false;
} while (target > doc);
+
return true;
}
}
Index: contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTask.java
===================================================================
--- contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTask.java (revision 740806)
+++ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTask.java (working copy)
@@ -19,6 +19,15 @@
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
+import org.apache.lucene.util.OpenBitSet;
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.index.IndexReader;
+import java.util.Random;
+import java.util.HashMap;
+import java.io.IOException;
/**
* Search task.
@@ -28,8 +37,17 @@
*/
public class SearchTask extends ReadTask {
+ private final double fpct;
+
public SearchTask(PerfRunData runData) {
super(runData);
+ fpct = getRunData().getConfig().get("filter.pct", 0.0);
+ final boolean ooo = getRunData().getConfig().get("docsoutoforder", false);
+ System.out.println("OOO=" + ooo);
+ BooleanQuery.setAllowDocsOutOfOrder(ooo);
+ final boolean filterPush = getRunData().getConfig().get("filterpush", false);
+ System.out.println("FILTPUSH=" + filterPush);
+ IndexSearcher.allowFilterPush = filterPush;
}
public boolean withRetrieve() {
@@ -52,5 +70,66 @@
return getRunData().getQueryMaker(this);
}
+ Filter filter;
+ private static boolean first2 = true;
+ public Filter getFilter() {
+ if (filter == null) {
+ if (fpct < 100.0) {
+ System.out.println("FILT=" + fpct);
+ filter = new MyFilter(fpct);
+ } else if (first2) {
+ System.out.println("FILT=none");
+ first2 = false;
+ }
+ }
+ return filter;
+ }
+
+ static final HashMap filters = new HashMap();
+ private class MyFilter extends Filter {
+ private final double fpct;
+
+ MyFilter(double fpct) {
+ this.fpct = fpct;
+ }
+
+ public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
+ OpenBitSet bits = (OpenBitSet) filters.get(reader);
+ if (bits == null) {
+ final int numDocs = reader.maxDoc();
+ bits = new OpenBitSet(numDocs);
+ final Random r = new java.util.Random(17);
+ int setCount = 0;
+ final boolean inverted;
+ final double fpct2;
+ if (fpct > 50.0) {
+ fpct2 = 100.0 - fpct;
+ inverted = true;
+ } else {
+ fpct2 = fpct;
+ inverted = false;
+ }
+
+ final int target = (int) ((fpct2/100.0)*numDocs);
+ while(setCount < target) {
+ setCount += bits.getAndSet(r.nextInt(numDocs)) ? 0:1;
+ }
+ if (setCount != bits.cardinality()) {
+ throw new RuntimeException("count mismatch");
+ }
+ if (inverted) {
+ OpenBitSet bits2 = new OpenBitSet(numDocs);
+ for(int i=0;i 0) {
if (sort != null) {
- hits = searcher.search(q, null, numHits, sort);
+ hits = searcher.search(q, filter, numHits, sort);
} else {
- hits = searcher.search(q, numHits);
+ hits = searcher.search(q, filter, numHits);
}
//System.out.println("q=" + q + ":" + hits.totalHits + " total hits");
-
+ if (first) {
+ System.out.println("NUMHITS=" + hits.totalHits);
+ System.out.println("q=" + q);
+ first = false;
+ }
if (withTraverse()) {
final ScoreDoc[] scoreDocs = hits.scoreDocs;
int traversalSize = Math.min(scoreDocs.length, traversalSize());