diff -ruN -x .svn -x build ../trunk/lucene/core/src/java/org/apache/lucene/search/DocIdSet.java lucene/core/src/java/org/apache/lucene/search/DocIdSet.java
--- ../trunk/lucene/core/src/java/org/apache/lucene/search/DocIdSet.java 2013-10-22 12:23:24.145395121 -0400
+++ lucene/core/src/java/org/apache/lucene/search/DocIdSet.java 2014-01-28 05:35:34.432944181 -0500
@@ -31,6 +31,13 @@
* are no docs that match. */
public abstract DocIdSetIterator iterator() throws IOException;
+ // TODO: somehow this class should express the cost of
+ // iteration vs the cost of random access Bits; for
+ // expensive Filters (e.g. distance < 1 km) we should use
+ // bits() after all other Query/Filters have matched, but
+ // this is the opposite of what bits() is for now
+ // (down-low filtering)
+
/** Optionally provides a {@link Bits} interface for random access
* to matching documents.
* @return {@code null}, if this {@code DocIdSet} does not support random access.
diff -ruN -x .svn -x build ../trunk/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java
--- ../trunk/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java 2013-07-15 15:52:15.585877445 -0400
+++ lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java 2014-01-29 12:08:24.746002360 -0500
@@ -50,7 +50,7 @@
* @param query Query to be filtered, cannot be null.
* @param filter Filter to apply to query results, cannot be null.
*/
- public FilteredQuery (Query query, Filter filter) {
+ public FilteredQuery(Query query, Filter filter) {
this(query, filter, RANDOM_ACCESS_FILTER_STRATEGY);
}
@@ -63,7 +63,7 @@
*
* @see FilterStrategy
*/
- public FilteredQuery (Query query, Filter filter, FilterStrategy strategy) {
+ public FilteredQuery(Query query, Filter filter, FilterStrategy strategy) {
if (query == null || filter == null)
throw new IllegalArgumentException("Query and filter cannot be null.");
if (strategy == null)
@@ -118,7 +118,9 @@
// return this query
@Override
- public Query getQuery() { return FilteredQuery.this; }
+ public Query getQuery() {
+ return FilteredQuery.this;
+ }
// return a filtering scorer
@Override
@@ -130,8 +132,8 @@
// this means the filter does not accept any documents.
return null;
}
+
return strategy.filteredScorer(context, scoreDocsInOrder, topScorer, weight, filterDocIdSet);
-
}
};
}
@@ -183,14 +185,12 @@
@Override
public int advance(int target) throws IOException {
-
int doc = scorer.advance(target);
if (doc != Scorer.NO_MORE_DOCS && !filterbits.get(doc)) {
return scorerDoc = nextDoc();
} else {
return scorerDoc = doc;
}
-
}
@Override
@@ -303,7 +303,9 @@
}
@Override
- public final int freq() throws IOException { return scorer.freq(); }
+ public final int freq() throws IOException {
+ return scorer.freq();
+ }
@Override
public final Collection getChildren() {
@@ -343,15 +345,6 @@
public Query rewrite(IndexReader reader) throws IOException {
final Query queryRewritten = query.rewrite(reader);
- if (queryRewritten instanceof MatchAllDocsQuery) {
- // Special case: If the query is a MatchAllDocsQuery, we only
- // return a CSQ(filter).
- final Query rewritten = new ConstantScoreQuery(filter);
- // Combine boost of MatchAllDocsQuery and the wrapped rewritten query:
- rewritten.setBoost(this.getBoost() * queryRewritten.getBoost());
- return rewritten;
- }
-
if (queryRewritten != query) {
// rewrite to a new FilteredQuery wrapping the rewritten query
final Query rewritten = new FilteredQuery(queryRewritten, filter, strategy);
@@ -527,7 +520,7 @@
final Bits filterAcceptDocs = docIdSet.bits();
// force if RA is requested
- final boolean useRandomAccess = (filterAcceptDocs != null && (useRandomAccess(filterAcceptDocs, firstFilterDoc)));
+ final boolean useRandomAccess = filterAcceptDocs != null && useRandomAccess(filterAcceptDocs, firstFilterDoc);
if (useRandomAccess) {
// if we are using random access, we return the inner scorer, just with other acceptDocs
return weight.scorer(context, scoreDocsInOrder, topScorer, filterAcceptDocs);
diff -ruN -x .svn -x build ../trunk/lucene/core/src/test/org/apache/lucene/search/TestFilteredQuery.java lucene/core/src/test/org/apache/lucene/search/TestFilteredQuery.java
--- ../trunk/lucene/core/src/test/org/apache/lucene/search/TestFilteredQuery.java 2013-03-20 06:26:07.127245398 -0400
+++ lucene/core/src/test/org/apache/lucene/search/TestFilteredQuery.java 2014-01-30 17:58:43.167128795 -0500
@@ -375,7 +375,6 @@
public void testRewrite() throws Exception {
assertRewrite(new FilteredQuery(new TermQuery(new Term("field", "one")), new PrefixFilter(new Term("field", "o")), randomFilterStrategy()), FilteredQuery.class);
assertRewrite(new FilteredQuery(new PrefixQuery(new Term("field", "one")), new PrefixFilter(new Term("field", "o")), randomFilterStrategy()), FilteredQuery.class);
- assertRewrite(new FilteredQuery(new MatchAllDocsQuery(), new PrefixFilter(new Term("field", "o")), randomFilterStrategy()), ConstantScoreQuery.class);
}
public void testGetFilterStrategy() {
diff -ruN -x .svn -x build ../trunk/lucene/demo/src/java/org/apache/lucene/demo/facet/DistanceFacetsExample.java lucene/demo/src/java/org/apache/lucene/demo/facet/DistanceFacetsExample.java
--- ../trunk/lucene/demo/src/java/org/apache/lucene/demo/facet/DistanceFacetsExample.java 2014-01-04 05:41:02.791645262 -0500
+++ lucene/demo/src/java/org/apache/lucene/demo/facet/DistanceFacetsExample.java 2014-01-31 07:09:37.437859355 -0500
@@ -29,18 +29,24 @@
import org.apache.lucene.expressions.SimpleBindings;
import org.apache.lucene.expressions.js.JavascriptCompiler;
import org.apache.lucene.facet.DrillDownQuery;
+import org.apache.lucene.facet.DrillSideways;
import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.Facets;
import org.apache.lucene.facet.FacetsCollector;
+import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.range.DoubleRange;
import org.apache.lucene.facet.range.DoubleRangeFacetCounts;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.queries.BooleanFilter;
import org.apache.lucene.queries.function.ValueSource;
-import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.NumericRangeFilter;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
@@ -59,6 +65,15 @@
private final Directory indexDir = new RAMDirectory();
private IndexSearcher searcher;
+ private final FacetsConfig config = new FacetsConfig();
+
+ public final static double ORIGIN_LATITUDE = 40.7143528;
+ public final static double ORIGIN_LONGITUDE = -74.0059731;
+
+ // NOTE: this is approximate, because the earth is a bit
+ // wider at the equator than the poles. See
+ // http://en.wikipedia.org/wiki/Earth_radius
+ public final static double EARTH_RADIUS_KM = 6371.01;
/** Empty constructor */
public DistanceFacetsExample() {}
@@ -68,6 +83,8 @@
IndexWriter writer = new IndexWriter(indexDir, new IndexWriterConfig(FacetExamples.EXAMPLES_VER,
new WhitespaceAnalyzer(FacetExamples.EXAMPLES_VER)));
+ // TODO: we could index in radians instead ... saves all the conversions in getBoundingBoxFilter
+
// Add documents with latitude/longitude location:
Document doc = new Document();
doc.add(new DoubleField("latitude", 40.759011, Field.Store.NO));
@@ -92,7 +109,8 @@
private ValueSource getDistanceValueSource() {
Expression distance;
try {
- distance = JavascriptCompiler.compile("haversin(40.7143528,-74.0059731,latitude,longitude)");
+ distance = JavascriptCompiler.compile(
+ "haversin(" + ORIGIN_LATITUDE + "," + ORIGIN_LONGITUDE + ",latitude,longitude)");
} catch (ParseException pe) {
// Should not happen
throw new RuntimeException(pe);
@@ -104,15 +122,80 @@
return distance.getValueSource(bindings);
}
+ /** Given a latitude and longitude (in degrees) and the
+ * maximum great circle (surface of the earth) distance,
+ * returns a simple Filter bounding box to "fast match"
+ * candidates. */
+ public static Filter getBoundingBoxFilter(double originLat, double originLng, double maxDistanceKM) {
+
+ // Basic bound box geo math from http://JanMatuschek.de/LatitudeLongitudeBoundingCoordinates
+
+ // TODO: maybe switch to recursive prefix tree instead
+ // (in lucene/spatial)? It should be more efficient
+ // since it's a 2D trie...
+
+ // Degrees -> Radians:
+ double originLatRadians = Math.toRadians(originLat);
+ double originLngRadians = Math.toRadians(originLng);
+
+ double angle = maxDistanceKM / EARTH_RADIUS_KM;
+
+ double minLat = originLatRadians - angle;
+ double maxLat = originLatRadians + angle;
+
+ double minLng;
+ double maxLng;
+ if (minLat > Math.toRadians(-90) && maxLat < Math.toRadians(90)) {
+ double delta = Math.asin(Math.sin(angle)/Math.cos(originLatRadians));
+ minLng = originLngRadians - delta;
+ if (minLng < Math.toRadians(-180)) {
+ minLng += 2 * Math.PI;
+ }
+ maxLng = originLngRadians + delta;
+ if (maxLng > Math.toRadians(180)) {
+ maxLng -= 2 * Math.PI;
+ }
+ } else {
+ // The query includes a pole!
+ minLat = Math.max(minLat, Math.toRadians(-90));
+ maxLat = Math.min(maxLat, Math.toRadians(90));
+ minLng = Math.toRadians(-180);
+ maxLng = Math.toRadians(180);
+ }
+
+ BooleanFilter f = new BooleanFilter();
+
+ // Add latitude range filter:
+ f.add(NumericRangeFilter.newDoubleRange("latitude", Math.toDegrees(minLat), Math.toDegrees(maxLat), true, true),
+ BooleanClause.Occur.MUST);
+
+ // Add longitude range filter:
+ if (minLng > maxLng) {
+ // The bounding box crosses the international date
+ // line:
+ BooleanFilter lonF = new BooleanFilter();
+ lonF.add(NumericRangeFilter.newDoubleRange("longitude", Math.toDegrees(minLng), null, true, true),
+ BooleanClause.Occur.SHOULD);
+ lonF.add(NumericRangeFilter.newDoubleRange("longitude", null, Math.toDegrees(maxLng), true, true),
+ BooleanClause.Occur.SHOULD);
+ f.add(lonF, BooleanClause.Occur.MUST);
+ } else {
+ f.add(NumericRangeFilter.newDoubleRange("longitude", Math.toDegrees(minLng), Math.toDegrees(maxLng), true, true),
+ BooleanClause.Occur.MUST);
+ }
+
+ return f;
+ }
+
/** User runs a query and counts facets. */
public FacetResult search() throws IOException {
-
FacetsCollector fc = new FacetsCollector();
searcher.search(new MatchAllDocsQuery(), fc);
Facets facets = new DoubleRangeFacetCounts("field", getDistanceValueSource(), fc,
+ getBoundingBoxFilter(ORIGIN_LATITUDE, ORIGIN_LONGITUDE, 10.0),
ONE_KM,
TWO_KM,
FIVE_KM,
@@ -127,10 +210,16 @@
// Passing no baseQuery means we drill down on all
// documents ("browse only"):
DrillDownQuery q = new DrillDownQuery(null);
-
- q.add("field", new ConstantScoreQuery(range.getFilter(getDistanceValueSource())));
-
- return searcher.search(q, 10);
+ final ValueSource vs = getDistanceValueSource();
+ q.add("field", range.getFilter(getBoundingBoxFilter(ORIGIN_LATITUDE, ORIGIN_LONGITUDE, range.max), vs));
+ DrillSideways ds = new DrillSideways(searcher, config, (TaxonomyReader) null) {
+ @Override
+ protected Facets buildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, String[] drillSidewaysDims) throws IOException {
+ assert drillSideways.length == 1;
+ return new DoubleRangeFacetCounts("field", vs, drillSideways[0], ONE_KM, TWO_KM, FIVE_KM, TEN_KM);
+ }
+ };
+ return ds.search(q, 10).hits;
}
@Override
diff -ruN -x .svn -x build ../trunk/lucene/facet/src/java/org/apache/lucene/facet/DrillDownQuery.java lucene/facet/src/java/org/apache/lucene/facet/DrillDownQuery.java
--- ../trunk/lucene/facet/src/java/org/apache/lucene/facet/DrillDownQuery.java 2014-01-04 05:41:29.375644548 -0500
+++ lucene/facet/src/java/org/apache/lucene/facet/DrillDownQuery.java 2014-01-28 06:31:59.728853622 -0500
@@ -18,22 +18,20 @@
*/
import java.io.IOException;
+import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
-import org.apache.lucene.facet.range.DoubleRangeFacetCounts;
-import org.apache.lucene.facet.range.LongRangeFacetCounts;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
-import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.FilteredQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
-import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
@@ -86,7 +84,7 @@
/** Used by DrillSideways */
DrillDownQuery(FacetsConfig config, Query baseQuery, List clauses, Map drillDownDims) {
- this.query = new BooleanQuery(true);
+ query = new BooleanQuery(true);
if (baseQuery != null) {
query.add(baseQuery, Occur.MUST);
}
@@ -155,11 +153,12 @@
/** Expert: add a custom drill-down subQuery. Use this
* when you have a separate way to drill-down on the
- * dimension than the indexed facet ordinals (for
- * example, use a {@link NumericRangeQuery} to drill down
- * after {@link LongRangeFacetCounts} or {@link DoubleRangeFacetCounts}. */
+ * dimension than the indexed facet ordinals. */
public void add(String dim, Query subQuery) {
+ if (drillDownDims.containsKey(dim)) {
+ throw new IllegalArgumentException("dimension \"" + dim + "\" already has a drill-down");
+ }
// TODO: we should use FilteredQuery?
// So scores of the drill-down query don't have an
@@ -172,6 +171,40 @@
drillDownDims.put(dim, drillDownDims.size());
}
+ /** Expert: add a custom drill-down Filter, e.g. when
+ * drilling down after range faceting. */
+ public void add(String dim, Filter subFilter) {
+
+ if (drillDownDims.containsKey(dim)) {
+ throw new IllegalArgumentException("dimension \"" + dim + "\" already has a drill-down");
+ }
+
+ // TODO: we should use FilteredQuery?
+
+ // So scores of the drill-down query don't have an
+ // effect:
+ final ConstantScoreQuery drillDownQuery = new ConstantScoreQuery(subFilter);
+ drillDownQuery.setBoost(0.0f);
+
+ query.add(drillDownQuery, Occur.MUST);
+
+ drillDownDims.put(dim, drillDownDims.size());
+ }
+
+ static Filter getFilter(Query query) {
+ if (query instanceof ConstantScoreQuery) {
+ ConstantScoreQuery csq = (ConstantScoreQuery) query;
+ Filter filter = csq.getFilter();
+ if (filter != null) {
+ return filter;
+ } else {
+ return getFilter(csq.getQuery());
+ }
+ } else {
+ return null;
+ }
+ }
+
@Override
public DrillDownQuery clone() {
return new DrillDownQuery(config, query, drillDownDims);
@@ -199,7 +232,63 @@
if (query.clauses().size() == 0) {
return new MatchAllDocsQuery();
}
- return query;
+
+ List filters = new ArrayList();
+ List queries = new ArrayList();
+ List clauses = query.clauses();
+ Query baseQuery;
+ int startIndex;
+ if (drillDownDims.size() == query.clauses().size()) {
+ baseQuery = new MatchAllDocsQuery();
+ startIndex = 0;
+ } else {
+ baseQuery = clauses.get(0).getQuery();
+ startIndex = 1;
+ }
+
+ for(int i=startIndex;i weightToIndex = new IdentityHashMap();
-
- private Scorer mainScorer;
-
- public DrillSidewaysCollector(Collector hitCollector, Collector drillDownCollector, Collector[] drillSidewaysCollectors,
- Map dims) {
- this.hitCollector = hitCollector;
- this.drillDownCollector = drillDownCollector;
- this.drillSidewaysCollectors = drillSidewaysCollectors;
- subScorers = new Scorer[dims.size()];
-
- if (dims.size() == 1) {
- // When we have only one dim, we insert the
- // MatchAllDocsQuery, bringing the clause count to
- // 2:
- exactCount = 2;
- } else {
- exactCount = dims.size();
- }
- }
-
- @Override
- public void collect(int doc) throws IOException {
- //System.out.println("collect doc=" + doc + " main.freq=" + mainScorer.freq() + " main.doc=" + mainScorer.docID() + " exactCount=" + exactCount);
-
- if (mainScorer == null) {
- // This segment did not have any docs with any
- // drill-down field & value:
- return;
- }
-
- if (mainScorer.freq() == exactCount) {
- // All sub-clauses from the drill-down filters
- // matched, so this is a "real" hit, so we first
- // collect in both the hitCollector and the
- // drillDown collector:
- //System.out.println(" hit " + drillDownCollector);
- hitCollector.collect(doc);
- if (drillDownCollector != null) {
- drillDownCollector.collect(doc);
- }
-
- // Also collect across all drill-sideways counts so
- // we "merge in" drill-down counts for this
- // dimension.
- for(int i=0;i doc: "subDoc=" + subDoc + " doc=" + doc;
- drillSidewaysCollectors[i].collect(doc);
- assert allMatchesFrom(i+1, doc);
- found = true;
- break;
- }
- }
- assert found;
- }
- }
-
- // Only used by assert:
- private boolean allMatchesFrom(int startFrom, int doc) {
- for(int i=startFrom;i drillDownDims = ddq.getDims();
-
- BooleanQuery topQuery = new BooleanQuery(true);
- final DrillSidewaysCollector collector = new DrillSidewaysCollector(hitCollector, drillDownCollector, drillSidewaysCollectors,
- drillDownDims);
-
- // TODO: if query is already a BQ we could copy that and
- // add clauses to it, instead of doing BQ inside BQ
- // (should be more efficient)? Problem is this can
- // affect scoring (coord) ... too bad we can't disable
- // coord on a clause by clause basis:
- topQuery.add(baseQuery, BooleanClause.Occur.MUST);
-
- // NOTE: in theory we could just make a single BQ, with
- // +query a b c minShouldMatch=2, but in this case,
- // annoyingly, BS2 wraps a sub-scorer that always
- // returns 2 as the .freq(), not how many of the
- // SHOULD clauses matched:
- BooleanQuery subQuery = new BooleanQuery(true);
-
- Query wrappedSubQuery = new QueryWrapper(subQuery,
- new SetWeight() {
- @Override
- public void set(Weight w) {
- collector.setWeight(w, -1);
- }
- });
- Query constantScoreSubQuery = new ConstantScoreQuery(wrappedSubQuery);
-
- // Don't impact score of original query:
- constantScoreSubQuery.setBoost(0.0f);
-
- topQuery.add(constantScoreSubQuery, BooleanClause.Occur.MUST);
-
- // Unfortunately this sub-BooleanQuery
- // will never get BS1 because today BS1 only works
- // if topScorer=true... and actually we cannot use BS1
- // anyways because we need subDocsScoredAtOnce:
- int dimIndex = 0;
+ Query[] drillDownQueries = new Query[clauses.length-startClause];
for(int i=startClause;i 1 || (nullCount == 1 && dims.length == 1)) {
+ // If more than one dim has no matches, then there
+ // are no hits nor drill-sideways counts. Or, if we
+ // have only one dim and that dim has no matches,
+ // same thing.
+ //if (nullCount > 1 || (nullCount == 1 && dims.length == 1)) {
+ if (nullCount > 1) {
return null;
}
// Sort drill-downs by most restrictive first:
Arrays.sort(dims);
- // TODO: it could be better if we take acceptDocs
- // into account instead of baseScorer?
- Scorer baseScorer = baseWeight.scorer(context, scoreDocsInOrder, false, acceptDocs);
-
if (baseScorer == null) {
return null;
}
return new DrillSidewaysScorer(this, context,
- baseScorer,
- drillDownCollector, dims);
+ baseScorer,
+ drillDownCollector, dims,
+ scoreSubDocsAtOnce);
}
};
}
@@ -174,7 +209,7 @@
result = prime * result + ((baseQuery == null) ? 0 : baseQuery.hashCode());
result = prime * result
+ ((drillDownCollector == null) ? 0 : drillDownCollector.hashCode());
- result = prime * result + Arrays.hashCode(drillDownTerms);
+ result = prime * result + Arrays.hashCode(drillDownQueries);
result = prime * result + Arrays.hashCode(drillSidewaysCollectors);
return result;
}
@@ -191,7 +226,7 @@
if (drillDownCollector == null) {
if (other.drillDownCollector != null) return false;
} else if (!drillDownCollector.equals(other.drillDownCollector)) return false;
- if (!Arrays.equals(drillDownTerms, other.drillDownTerms)) return false;
+ if (!Arrays.equals(drillDownQueries, other.drillDownQueries)) return false;
if (!Arrays.equals(drillSidewaysCollectors, other.drillSidewaysCollectors)) return false;
return true;
}
diff -ruN -x .svn -x build ../trunk/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java
--- ../trunk/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java 2014-01-04 05:41:31.727644481 -0500
+++ lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java 2014-01-29 06:37:36.850533308 -0500
@@ -22,10 +22,11 @@
import java.util.Collections;
import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
+import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
class DrillSidewaysScorer extends Scorer {
@@ -34,13 +35,15 @@
private final Collector drillDownCollector;
- private final DocsEnumsAndFreq[] dims;
+ private final DocsAndCost[] dims;
// DrillDown DocsEnums:
private final Scorer baseScorer;
private final AtomicReaderContext context;
+ final boolean scoreSubDocsAtOnce;
+
private static final int CHUNK = 2048;
private static final int MASK = CHUNK-1;
@@ -48,12 +51,13 @@
private float collectScore;
DrillSidewaysScorer(Weight w, AtomicReaderContext context, Scorer baseScorer, Collector drillDownCollector,
- DocsEnumsAndFreq[] dims) {
+ DocsAndCost[] dims, boolean scoreSubDocsAtOnce) {
super(w);
this.dims = dims;
this.context = context;
this.baseScorer = baseScorer;
this.drillDownCollector = drillDownCollector;
+ this.scoreSubDocsAtOnce = scoreSubDocsAtOnce;
}
@Override
@@ -67,7 +71,7 @@
drillDownCollector.setScorer(this);
drillDownCollector.setNextReader(context);
}
- for(DocsEnumsAndFreq dim : dims) {
+ for (DocsAndCost dim : dims) {
dim.sidewaysCollector.setScorer(this);
dim.sidewaysCollector.setNextReader(context);
}
@@ -79,26 +83,38 @@
// Position all scorers to their first matching doc:
baseScorer.nextDoc();
- for(DocsEnumsAndFreq dim : dims) {
- for (DocsEnum docsEnum : dim.docsEnums) {
- if (docsEnum != null) {
- docsEnum.nextDoc();
- }
+ int numBits = 0;
+ for (DocsAndCost dim : dims) {
+ if (dim.disi != null) {
+ dim.disi.nextDoc();
+ } else if (dim.bits != null) {
+ numBits++;
}
}
final int numDims = dims.length;
- DocsEnum[][] docsEnums = new DocsEnum[numDims][];
- Collector[] sidewaysCollectors = new Collector[numDims];
+ Bits[] bits = new Bits[numBits];
+ Collector[] bitsSidewaysCollectors = new Collector[numBits];
+
+ DocIdSetIterator[] disis = new DocIdSetIterator[numDims-numBits];
+ Collector[] sidewaysCollectors = new Collector[numDims-numBits];
long drillDownCost = 0;
- for(int dim=0;dim 1 && (dims[1].maxCost < baseQueryCost/10)) {
+ if (bitsUpto > 0 || scoreSubDocsAtOnce || baseQueryCost < drillDownCost/10) {
+ //System.out.println("queryFirst: baseScorer=" + baseScorer + " disis.length=" + disis.length + " bits.length=" + bits.length);
+ doQueryFirstScoring(collector, disis, sidewaysCollectors, bits, bitsSidewaysCollectors);
+ } else if (numDims > 1 && (dims[1].disi == null || dims[1].disi.cost() < baseQueryCost/10)) {
//System.out.println("drillDownAdvance");
- doDrillDownAdvanceScoring(collector, docsEnums, sidewaysCollectors);
+ doDrillDownAdvanceScoring(collector, disis, sidewaysCollectors);
} else {
//System.out.println("union");
- doUnionScoring(collector, docsEnums, sidewaysCollectors);
+ doUnionScoring(collector, disis, sidewaysCollectors);
+ }
+ }
+
+ /** Used when base query is highly constraining vs the
+ * drilldowns, or when the docs must be scored at once
+ * (i.e., like BooleanScorer2, not BooleanScorer). In
+ * this case we just .next() on base and .advance() on
+ * the dim filters. */
+ private void doQueryFirstScoring(Collector collector, DocIdSetIterator[] disis, Collector[] sidewaysCollectors,
+ Bits[] bits, Collector[] bitsSidewaysCollectors) throws IOException {
+ //if (DEBUG) {
+ // System.out.println(" doQueryFirstScoring");
+ //}
+ int docID = baseScorer.docID();
+
+ nextDoc: while (docID != NO_MORE_DOCS) {
+ Collector failedCollector = null;
+ for (int i=0;i docID) {
+ if (failedCollector != null) {
+ // More than one dim fails on this document, so
+ // it's neither a hit nor a near-miss; move to
+ // next doc:
+ docID = baseScorer.nextDoc();
+ continue nextDoc;
+ } else {
+ failedCollector = sidewaysCollectors[i];
+ }
+ }
+ }
+
+ // TODO: for the "non-costly Bits" we really should
+ // have passed them down as acceptDocs, but
+ // unfortunately we cannot distinguish today betwen
+ // "bits() is so costly that you should apply it last"
+ // from "bits() is so cheap that you should apply it
+ // everywhere down low"
+
+ // Fold in Filter Bits last, since they may be costly:
+ for(int i=0;i= dim) {
@@ -299,8 +383,9 @@
counts[slot] = dim+1;
}
}
+
// TODO: sometimes use advance?
- docID = docsEnum.nextDoc();
+ docID = disi.nextDoc();
}
}
}
@@ -309,7 +394,7 @@
//if (DEBUG) {
// System.out.println(" now collect: " + filledCount + " hits");
//}
- for(int i=0;i {
- DocsEnum[] docsEnums;
- // Max cost for all docsEnums for this dim:
- long maxCost;
+ static class DocsAndCost implements Comparable {
+ // Iterator for docs matching this dim's filter, or ...
+ DocIdSetIterator disi;
+ // Random access bits:
+ Bits bits;
Collector sidewaysCollector;
String dim;
@Override
- public int compareTo(DocsEnumsAndFreq other) {
- if (maxCost < other.maxCost) {
+ public int compareTo(DocsAndCost other) {
+ if (disi == null) {
+ if (other.disi == null) {
+ return 0;
+ } else {
+ return 1;
+ }
+ } else if (other.disi == null) {
+ return -1;
+ } else if (disi.cost() < other.disi.cost()) {
return -1;
- } else if (maxCost > other.maxCost) {
+ } else if (disi.cost() > other.disi.cost()) {
return 1;
} else {
return 0;
diff -ruN -x .svn -x build ../trunk/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRangeFacetCounts.java lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRangeFacetCounts.java
--- ../trunk/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRangeFacetCounts.java 2014-01-06 06:08:26.778978702 -0500
+++ lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRangeFacetCounts.java 2014-01-30 11:20:29.291767993 -0500
@@ -24,12 +24,15 @@
import org.apache.lucene.document.DoubleDocValuesField; // javadocs
import org.apache.lucene.document.FloatDocValuesField; // javadocs
import org.apache.lucene.facet.Facets;
-import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
+import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.DoubleFieldSource;
import org.apache.lucene.queries.function.valuesource.FloatFieldSource; // javadocs
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.util.Bits;
import org.apache.lucene.util.NumericUtils;
/** {@link Facets} implementation that computes counts for
@@ -60,7 +63,16 @@
/** Create {@code RangeFacetCounts}, using the provided
* {@link ValueSource}. */
public DoubleRangeFacetCounts(String field, ValueSource valueSource, FacetsCollector hits, DoubleRange... ranges) throws IOException {
- super(field, ranges);
+ this(field, valueSource, hits, null, ranges);
+ }
+
+ /** Create {@code RangeFacetCounts}, using the provided
+ * {@link ValueSource}, and using the provided Filter as
+ * a fastmatch: only documents passing the filter are
+ * checked for the matching ranges. The filter must be
+ * random access (implement {@link DocIdSet#bits}). */
+ public DoubleRangeFacetCounts(String field, ValueSource valueSource, FacetsCollector hits, Filter fastMatchFilter, DoubleRange... ranges) throws IOException {
+ super(field, ranges, fastMatchFilter);
count(valueSource, hits.getMatchingDocs());
}
@@ -84,7 +96,26 @@
final int length = hits.bits.length();
int doc = 0;
totCount += hits.totalHits;
+ Bits bits;
+ if (fastMatchFilter != null) {
+ DocIdSet dis = fastMatchFilter.getDocIdSet(hits.context, null);
+ if (dis == null) {
+ // No documents match
+ continue;
+ }
+ bits = dis.bits();
+ if (bits == null) {
+ throw new IllegalArgumentException("fastMatchFilter does not implement DocIdSet.bits");
+ }
+ } else {
+ bits = null;
+ }
+
while (doc < length && (doc = hits.bits.nextSetBit(doc)) != -1) {
+ if (bits != null && bits.get(doc) == false) {
+ doc++;
+ continue;
+ }
// Skip missing docs:
if (fv.exists(doc)) {
counter.add(NumericUtils.doubleToSortableLong(fv.doubleVal(doc)));
diff -ruN -x .svn -x build ../trunk/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRange.java lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRange.java
--- ../trunk/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRange.java 2014-01-04 05:41:42.563644195 -0500
+++ lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRange.java 2014-01-30 11:11:22.759782610 -0500
@@ -26,11 +26,12 @@
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
-import org.apache.lucene.search.NumericRangeFilter; // javadocs
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.NumericUtils;
-/** Represents a range over double values. */
+/** Represents a range over double values.
+ *
+ * @lucene.experimental */
public final class DoubleRange extends Range {
final double minIncl;
final double maxIncl;
@@ -99,14 +100,15 @@
return "DoubleRange(" + minIncl + " to " + maxIncl + ")";
}
- /** Returns a new {@link Filter} accepting only documents
- * in this range. Note that this filter is not
- * efficient: it's a linear scan of all docs, testing
- * each value. If the {@link ValueSource} is static,
- * e.g. an indexed numeric field, then it's more
- * efficient to use {@link NumericRangeFilter}. */
- public Filter getFilter(final ValueSource valueSource) {
+ @Override
+ public Filter getFilter(final Filter fastMatchFilter, final ValueSource valueSource) {
return new Filter() {
+
+ @Override
+ public String toString() {
+ return "Filter(" + DoubleRange.this.toString() + ")";
+ }
+
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException {
@@ -119,49 +121,48 @@
final int maxDoc = context.reader().maxDoc();
+ final Bits fastMatchBits;
+ if (fastMatchFilter != null) {
+ DocIdSet dis = fastMatchFilter.getDocIdSet(context, null);
+ if (dis == null) {
+ // No documents match
+ return null;
+ }
+ fastMatchBits = dis.bits();
+ if (fastMatchBits == null) {
+ throw new IllegalArgumentException("fastMatchFilter does not implement DocIdSet.bits");
+ }
+ } else {
+ fastMatchBits = null;
+ }
+
return new DocIdSet() {
@Override
- public DocIdSetIterator iterator() {
- return new DocIdSetIterator() {
- int doc = -1;
-
+ public Bits bits() {
+ return new Bits() {
@Override
- public int nextDoc() throws IOException {
- while (true) {
- doc++;
- if (doc == maxDoc) {
- return doc = NO_MORE_DOCS;
- }
- if (acceptDocs != null && acceptDocs.get(doc) == false) {
- continue;
- }
- double v = values.doubleVal(doc);
- if (accept(v)) {
- return doc;
- }
+ public boolean get(int docID) {
+ if (acceptDocs != null && acceptDocs.get(docID) == false) {
+ return false;
}
+ if (fastMatchBits != null && fastMatchBits.get(docID) == false) {
+ return false;
+ }
+ return accept(values.doubleVal(docID));
}
@Override
- public int advance(int target) throws IOException {
- doc = target-1;
- return nextDoc();
- }
-
- @Override
- public int docID() {
- return doc;
- }
-
- @Override
- public long cost() {
- // Since we do a linear scan over all
- // documents, our cost is O(maxDoc):
+ public int length() {
return maxDoc;
}
};
}
+
+ @Override
+ public DocIdSetIterator iterator() {
+ throw new UnsupportedOperationException("this filter can only be accessed via bits()");
+ }
};
}
};
diff -ruN -x .svn -x build ../trunk/lucene/facet/src/java/org/apache/lucene/facet/range/LongRangeFacetCounts.java lucene/facet/src/java/org/apache/lucene/facet/range/LongRangeFacetCounts.java
--- ../trunk/lucene/facet/src/java/org/apache/lucene/facet/range/LongRangeFacetCounts.java 2014-01-06 06:08:27.070978691 -0500
+++ lucene/facet/src/java/org/apache/lucene/facet/range/LongRangeFacetCounts.java 2014-01-30 11:20:17.851768299 -0500
@@ -22,11 +22,14 @@
import java.util.List;
import org.apache.lucene.facet.Facets;
-import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
+import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.LongFieldSource;
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.util.Bits;
/** {@link Facets} implementation that computes counts for
* dynamic long ranges from a provided {@link ValueSource},
@@ -49,7 +52,16 @@
/** Create {@code RangeFacetCounts}, using the provided
* {@link ValueSource}. */
public LongRangeFacetCounts(String field, ValueSource valueSource, FacetsCollector hits, LongRange... ranges) throws IOException {
- super(field, ranges);
+ this(field, valueSource, hits, null, ranges);
+ }
+
+ /** Create {@code RangeFacetCounts}, using the provided
+ * {@link ValueSource}, and using the provided Filter as
+ * a fastmatch: only documents passing the filter are
+ * checked for the matching ranges. The filter must be
+ * random access (implement {@link DocIdSet#bits}). */
+ public LongRangeFacetCounts(String field, ValueSource valueSource, FacetsCollector hits, Filter fastMatchFilter, LongRange... ranges) throws IOException {
+ super(field, ranges, fastMatchFilter);
count(valueSource, hits.getMatchingDocs());
}
@@ -65,7 +77,26 @@
final int length = hits.bits.length();
int doc = 0;
totCount += hits.totalHits;
+ Bits bits;
+ if (fastMatchFilter != null) {
+ DocIdSet dis = fastMatchFilter.getDocIdSet(hits.context, null);
+ if (dis == null) {
+ // No documents match
+ continue;
+ }
+ bits = dis.bits();
+ if (bits == null) {
+ throw new IllegalArgumentException("fastMatchFilter does not implement DocIdSet.bits");
+ }
+ } else {
+ bits = null;
+ }
+
while (doc < length && (doc = hits.bits.nextSetBit(doc)) != -1) {
+ if (bits != null && bits.get(doc) == false) {
+ doc++;
+ continue;
+ }
// Skip missing docs:
if (fv.exists(doc)) {
counter.add(fv.longVal(doc));
diff -ruN -x .svn -x build ../trunk/lucene/facet/src/java/org/apache/lucene/facet/range/LongRange.java lucene/facet/src/java/org/apache/lucene/facet/range/LongRange.java
--- ../trunk/lucene/facet/src/java/org/apache/lucene/facet/range/LongRange.java 2014-01-04 05:41:41.555644222 -0500
+++ lucene/facet/src/java/org/apache/lucene/facet/range/LongRange.java 2014-01-30 11:11:21.415782645 -0500
@@ -26,10 +26,11 @@
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
-import org.apache.lucene.search.NumericRangeFilter; // javadocs
import org.apache.lucene.util.Bits;
-/** Represents a range over long values. */
+/** Represents a range over long values.
+ *
+ * @lucene.experimental */
public final class LongRange extends Range {
final long minIncl;
final long maxIncl;
@@ -91,14 +92,15 @@
return "LongRange(" + minIncl + " to " + maxIncl + ")";
}
- /** Returns a new {@link Filter} accepting only documents
- * in this range. Note that this filter is not
- * efficient: it's a linear scan of all docs, testing
- * each value. If the {@link ValueSource} is static,
- * e.g. an indexed numeric field, then it's more
- * efficient to use {@link NumericRangeFilter}. */
- public Filter getFilter(final ValueSource valueSource) {
+ @Override
+ public Filter getFilter(final Filter fastMatchFilter, final ValueSource valueSource) {
return new Filter() {
+
+ @Override
+ public String toString() {
+ return "Filter(" + LongRange.this.toString() + ")";
+ }
+
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException {
@@ -111,49 +113,48 @@
final int maxDoc = context.reader().maxDoc();
+ final Bits fastMatchBits;
+ if (fastMatchFilter != null) {
+ DocIdSet dis = fastMatchFilter.getDocIdSet(context, null);
+ if (dis == null) {
+ // No documents match
+ return null;
+ }
+ fastMatchBits = dis.bits();
+ if (fastMatchBits == null) {
+ throw new IllegalArgumentException("fastMatchFilter does not implement DocIdSet.bits");
+ }
+ } else {
+ fastMatchBits = null;
+ }
+
return new DocIdSet() {
@Override
- public DocIdSetIterator iterator() {
- return new DocIdSetIterator() {
- int doc = -1;
-
+ public Bits bits() {
+ return new Bits() {
@Override
- public int nextDoc() throws IOException {
- while (true) {
- doc++;
- if (doc == maxDoc) {
- return doc = NO_MORE_DOCS;
- }
- if (acceptDocs != null && acceptDocs.get(doc) == false) {
- continue;
- }
- long v = values.longVal(doc);
- if (accept(v)) {
- return doc;
- }
+ public boolean get(int docID) {
+ if (acceptDocs != null && acceptDocs.get(docID) == false) {
+ return false;
}
+ if (fastMatchBits != null && fastMatchBits.get(docID) == false) {
+ return false;
+ }
+ return accept(values.longVal(docID));
}
@Override
- public int advance(int target) throws IOException {
- doc = target-1;
- return nextDoc();
- }
-
- @Override
- public int docID() {
- return doc;
- }
-
- @Override
- public long cost() {
- // Since we do a linear scan over all
- // documents, our cost is O(maxDoc):
+ public int length() {
return maxDoc;
}
};
}
+
+ @Override
+ public DocIdSetIterator iterator() {
+ throw new UnsupportedOperationException("this filter can only be accessed via bits()");
+ }
};
}
};
diff -ruN -x .svn -x build ../trunk/lucene/facet/src/java/org/apache/lucene/facet/range/RangeFacetCounts.java lucene/facet/src/java/org/apache/lucene/facet/range/RangeFacetCounts.java
--- ../trunk/lucene/facet/src/java/org/apache/lucene/facet/range/RangeFacetCounts.java 2014-01-04 05:41:41.359644451 -0500
+++ lucene/facet/src/java/org/apache/lucene/facet/range/RangeFacetCounts.java 2014-01-30 16:19:10.391288587 -0500
@@ -24,7 +24,7 @@
import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.Facets;
import org.apache.lucene.facet.LabelAndValue;
-import org.apache.lucene.queries.function.valuesource.LongFieldSource;
+import org.apache.lucene.search.Filter;
/** Base class for range faceting.
*
@@ -36,17 +36,23 @@
/** Counts, initialized in by subclass. */
protected final int[] counts;
+ /** Optional: if specified, we first test this Filter to
+ * see whether the document should be checked for
+ * matching ranges. If this is null, all documents are
+ * checked. */
+ protected final Filter fastMatchFilter;
+
/** Our field name. */
protected final String field;
/** Total number of hits. */
protected int totCount;
- /** Create {@code RangeFacetCounts}, using {@link
- * LongFieldSource} from the specified field. */
- protected RangeFacetCounts(String field, Range[] ranges) throws IOException {
+ /** Create {@code RangeFacetCounts} */
+ protected RangeFacetCounts(String field, Range[] ranges, Filter fastMatchFilter) throws IOException {
this.field = field;
this.ranges = ranges;
+ this.fastMatchFilter = fastMatchFilter;
counts = new int[ranges.length];
}
diff -ruN -x .svn -x build ../trunk/lucene/facet/src/java/org/apache/lucene/facet/range/Range.java lucene/facet/src/java/org/apache/lucene/facet/range/Range.java
--- ../trunk/lucene/facet/src/java/org/apache/lucene/facet/range/Range.java 2014-01-04 05:41:41.975644157 -0500
+++ lucene/facet/src/java/org/apache/lucene/facet/range/Range.java 2014-01-30 11:06:51.131789879 -0500
@@ -17,6 +17,10 @@
* limitations under the License.
*/
+import org.apache.lucene.queries.function.ValueSource;
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.search.FilteredQuery; // javadocs
+
/** Base class for a single labeled range.
*
* @lucene.experimental */
@@ -33,6 +37,32 @@
this.label = label;
}
+ /** Returns a new {@link Filter} accepting only documents
+ * in this range. This filter is not general-purpose;
+ * you should either use it with {@link DrillSideways} by
+ * adding it to {@link DrillDownQuery#add}, or pass it to
+ * {@link FilteredQuery} using its {@link
+ * FilteredQuery#QUERY_FIRST_FILTER_STRATEGY}. If the
+ * {@link ValueSource} is static, e.g. an indexed numeric
+ * field, then it may be more efficient to use {@link
+ * NumericRangeFilter}. The provided fastMatchFilter,
+ * if non-null, will first be consulted, and only if
+ * that is set for each document will the range then be
+ * checked. */
+ public abstract Filter getFilter(Filter fastMatchFilter, ValueSource valueSource);
+
+ /** Returns a new {@link Filter} accepting only documents
+ * in this range. This filter is not general-purpose;
+ * you should either use it with {@link DrillSideways} by
+ * adding it to {@link DrillDownQuery#add}, or pass it to
+ * {@link FilteredQuery} using its {@link
+ * FilteredQuery#QUERY_FIRST_FILTER_STRATEGY}. If the
+ * {@link ValueSource} is static, e.g. an indexed numeric
+ * field, then it may be more efficient to use {@link NumericRangeFilter}. */
+ public Filter getFilter(ValueSource valueSource) {
+ return getFilter(null, valueSource);
+ }
+
/** Invoke this for a useless range. */
protected void failNoMatch() {
throw new IllegalArgumentException("range \"" + label + "\" matches nothing");
diff -ruN -x .svn -x build ../trunk/lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeFacetCounts.java lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeFacetCounts.java
--- ../trunk/lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeFacetCounts.java 2014-01-06 06:25:30.358951320 -0500
+++ lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeFacetCounts.java 2014-01-30 11:43:25.919731165 -0500
@@ -30,6 +30,7 @@
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.facet.DrillDownQuery;
+import org.apache.lucene.facet.DrillSideways.DrillSidewaysResult;
import org.apache.lucene.facet.DrillSideways;
import org.apache.lucene.facet.FacetField;
import org.apache.lucene.facet.FacetResult;
@@ -39,10 +40,10 @@
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.LabelAndValue;
import org.apache.lucene.facet.MultiFacets;
-import org.apache.lucene.facet.DrillSideways.DrillSidewaysResult;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
+import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
@@ -50,12 +51,20 @@
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.docvalues.DoubleDocValues;
+import org.apache.lucene.queries.function.valuesource.DoubleFieldSource;
import org.apache.lucene.queries.function.valuesource.FloatFieldSource;
-import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.queries.function.valuesource.LongFieldSource;
+import org.apache.lucene.search.CachingWrapperFilter;
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.NumericRangeFilter;
import org.apache.lucene.search.NumericRangeQuery;
+import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util._TestUtil;
@@ -229,6 +238,10 @@
IndexSearcher s = newSearcher(r);
+ if (VERBOSE) {
+ System.out.println("TEST: searcher=" + s);
+ }
+
DrillSideways ds = new DrillSideways(s, config, tr) {
@Override
@@ -365,6 +378,8 @@
System.out.println("TEST: numDocs=" + numDocs);
}
long[] values = new long[numDocs];
+ long minValue = Long.MAX_VALUE;
+ long maxValue = Long.MIN_VALUE;
for(int i=0;i 0 && random().nextInt(10) == 7) {
@@ -447,13 +466,26 @@
}
if (accept) {
expectedCounts[rangeID]++;
+ minAcceptedValue = Math.min(minAcceptedValue, values[i]);
+ maxAcceptedValue = Math.max(maxAcceptedValue, values[i]);
}
}
}
FacetsCollector sfc = new FacetsCollector();
s.search(new MatchAllDocsQuery(), sfc);
- Facets facets = new LongRangeFacetCounts("field", sfc, ranges);
+ Filter fastMatchFilter;
+ if (random().nextBoolean()) {
+ if (random().nextBoolean()) {
+ fastMatchFilter = NumericRangeFilter.newLongRange("field", minValue, maxValue, true, true);
+ } else {
+ fastMatchFilter = NumericRangeFilter.newLongRange("field", minAcceptedValue, maxAcceptedValue, true, true);
+ }
+ } else {
+ fastMatchFilter = null;
+ }
+ ValueSource vs = new LongFieldSource("field");
+ Facets facets = new LongRangeFacetCounts("field", vs, sfc, fastMatchFilter, ranges);
FacetResult result = facets.getTopChildren(10, "field");
assertEquals(numRange, result.labelValues.length);
for(int rangeID=0;rangeID 0 && random().nextInt(10) == 7) {
@@ -693,13 +766,26 @@
}
if (accept) {
expectedCounts[rangeID]++;
+ minAcceptedValue = Math.min(minAcceptedValue, values[i]);
+ maxAcceptedValue = Math.max(maxAcceptedValue, values[i]);
}
}
}
FacetsCollector sfc = new FacetsCollector();
s.search(new MatchAllDocsQuery(), sfc);
- Facets facets = new DoubleRangeFacetCounts("field", sfc, ranges);
+ Filter fastMatchFilter;
+ if (random().nextBoolean()) {
+ if (random().nextBoolean()) {
+ fastMatchFilter = NumericRangeFilter.newDoubleRange("field", minValue, maxValue, true, true);
+ } else {
+ fastMatchFilter = NumericRangeFilter.newDoubleRange("field", minAcceptedValue, maxAcceptedValue, true, true);
+ }
+ } else {
+ fastMatchFilter = null;
+ }
+ ValueSource vs = new DoubleFieldSource("field");
+ Facets facets = new DoubleRangeFacetCounts("field", vs, sfc, fastMatchFilter, ranges);
FacetResult result = facets.getTopChildren(10, "field");
assertEquals(numRange, result.labelValues.length);
for(int rangeID=0;rangeID