diff -ruN -x .svn -x build ../trunk2/lucene/core/src/java/org/apache/lucene/search/DocIdSet.java lucene/core/src/java/org/apache/lucene/search/DocIdSet.java
--- ../trunk2/lucene/core/src/java/org/apache/lucene/search/DocIdSet.java 2014-01-29 06:40:22.102528892 -0500
+++ lucene/core/src/java/org/apache/lucene/search/DocIdSet.java 2014-01-28 05:35:34.432944181 -0500
@@ -31,6 +31,13 @@
* are no docs that match. */
public abstract DocIdSetIterator iterator() throws IOException;
+ // TODO: somehow this class should express the cost of
+ // iteration vs the cost of random access Bits; for
+ // expensive Filters (e.g. distance < 1 km) we should use
+ // bits() after all other Query/Filters have matched, but
+ // this is the opposite of what bits() is for now
+ // (down-low filtering)
+
/** Optionally provides a {@link Bits} interface for random access
* to matching documents.
* @return {@code null}, if this {@code DocIdSet} does not support random access.
diff -ruN -x .svn -x build ../trunk2/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java
--- ../trunk2/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java 2014-01-29 06:40:22.102528892 -0500
+++ lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java 2014-01-28 06:32:42.216852483 -0500
@@ -50,7 +50,7 @@
* @param query Query to be filtered, cannot be null.
* @param filter Filter to apply to query results, cannot be null.
*/
- public FilteredQuery (Query query, Filter filter) {
+ public FilteredQuery(Query query, Filter filter) {
this(query, filter, RANDOM_ACCESS_FILTER_STRATEGY);
}
@@ -63,7 +63,7 @@
*
* @see FilterStrategy
*/
- public FilteredQuery (Query query, Filter filter, FilterStrategy strategy) {
+ public FilteredQuery(Query query, Filter filter, FilterStrategy strategy) {
if (query == null || filter == null)
throw new IllegalArgumentException("Query and filter cannot be null.");
if (strategy == null)
@@ -118,7 +118,9 @@
// return this query
@Override
- public Query getQuery() { return FilteredQuery.this; }
+ public Query getQuery() {
+ return FilteredQuery.this;
+ }
// return a filtering scorer
@Override
@@ -130,8 +132,8 @@
// this means the filter does not accept any documents.
return null;
}
+
return strategy.filteredScorer(context, scoreDocsInOrder, topScorer, weight, filterDocIdSet);
-
}
};
}
@@ -183,14 +185,12 @@
@Override
public int advance(int target) throws IOException {
-
int doc = scorer.advance(target);
if (doc != Scorer.NO_MORE_DOCS && !filterbits.get(doc)) {
return scorerDoc = nextDoc();
} else {
return scorerDoc = doc;
}
-
}
@Override
@@ -303,7 +303,9 @@
}
@Override
- public final int freq() throws IOException { return scorer.freq(); }
+ public final int freq() throws IOException {
+ return scorer.freq();
+ }
@Override
public final Collection getChildren() {
@@ -527,7 +529,7 @@
final Bits filterAcceptDocs = docIdSet.bits();
// force if RA is requested
- final boolean useRandomAccess = (filterAcceptDocs != null && (useRandomAccess(filterAcceptDocs, firstFilterDoc)));
+ final boolean useRandomAccess = filterAcceptDocs != null && useRandomAccess(filterAcceptDocs, firstFilterDoc);
if (useRandomAccess) {
// if we are using random access, we return the inner scorer, just with other acceptDocs
return weight.scorer(context, scoreDocsInOrder, topScorer, filterAcceptDocs);
diff -ruN -x .svn -x build ../trunk2/lucene/demo/src/java/org/apache/lucene/demo/facet/DistanceFacetsExample.java lucene/demo/src/java/org/apache/lucene/demo/facet/DistanceFacetsExample.java
--- ../trunk2/lucene/demo/src/java/org/apache/lucene/demo/facet/DistanceFacetsExample.java 2014-01-29 06:40:22.070528892 -0500
+++ lucene/demo/src/java/org/apache/lucene/demo/facet/DistanceFacetsExample.java 2014-01-28 06:31:33.784854310 -0500
@@ -29,16 +29,18 @@
import org.apache.lucene.expressions.SimpleBindings;
import org.apache.lucene.expressions.js.JavascriptCompiler;
import org.apache.lucene.facet.DrillDownQuery;
+import org.apache.lucene.facet.DrillSideways;
import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.Facets;
import org.apache.lucene.facet.FacetsCollector;
+import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.range.DoubleRange;
import org.apache.lucene.facet.range.DoubleRangeFacetCounts;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queries.function.ValueSource;
-import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.SortField;
@@ -59,6 +61,7 @@
private final Directory indexDir = new RAMDirectory();
private IndexSearcher searcher;
+ private final FacetsConfig config = new FacetsConfig();
/** Empty constructor */
public DistanceFacetsExample() {}
@@ -92,7 +95,8 @@
private ValueSource getDistanceValueSource() {
Expression distance;
try {
- distance = JavascriptCompiler.compile("haversin(40.7143528,-74.0059731,latitude,longitude)");
+ distance = JavascriptCompiler.compile(
+ "haversin(40.7143528,-74.0059731,latitude,longitude)");
} catch (ParseException pe) {
// Should not happen
throw new RuntimeException(pe);
@@ -107,7 +111,6 @@
/** User runs a query and counts facets. */
public FacetResult search() throws IOException {
-
FacetsCollector fc = new FacetsCollector();
searcher.search(new MatchAllDocsQuery(), fc);
@@ -127,10 +130,16 @@
// Passing no baseQuery means we drill down on all
// documents ("browse only"):
DrillDownQuery q = new DrillDownQuery(null);
-
- q.add("field", new ConstantScoreQuery(range.getFilter(getDistanceValueSource())));
-
- return searcher.search(q, 10);
+ final ValueSource vs = getDistanceValueSource();
+ q.add("field", range.getFilter(vs));
+ DrillSideways ds = new DrillSideways(searcher, config, (TaxonomyReader) null) {
+ @Override
+ protected Facets buildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, String[] drillSidewaysDims) throws IOException {
+ assert drillSideways.length == 1;
+ return new DoubleRangeFacetCounts("field", vs, drillSideways[0], ONE_KM, TWO_KM, FIVE_KM, TEN_KM);
+ }
+ };
+ return ds.search(q, 10).hits;
}
@Override
diff -ruN -x .svn -x build ../trunk2/lucene/facet/src/java/org/apache/lucene/facet/DrillDownQuery.java lucene/facet/src/java/org/apache/lucene/facet/DrillDownQuery.java
--- ../trunk2/lucene/facet/src/java/org/apache/lucene/facet/DrillDownQuery.java 2014-01-29 06:40:22.154528890 -0500
+++ lucene/facet/src/java/org/apache/lucene/facet/DrillDownQuery.java 2014-01-28 06:31:59.728853622 -0500
@@ -18,22 +18,20 @@
*/
import java.io.IOException;
+import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
-import org.apache.lucene.facet.range.DoubleRangeFacetCounts;
-import org.apache.lucene.facet.range.LongRangeFacetCounts;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
-import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.FilteredQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
-import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
@@ -86,7 +84,7 @@
/** Used by DrillSideways */
DrillDownQuery(FacetsConfig config, Query baseQuery, List clauses, Map drillDownDims) {
- this.query = new BooleanQuery(true);
+ query = new BooleanQuery(true);
if (baseQuery != null) {
query.add(baseQuery, Occur.MUST);
}
@@ -155,11 +153,12 @@
/** Expert: add a custom drill-down subQuery. Use this
* when you have a separate way to drill-down on the
- * dimension than the indexed facet ordinals (for
- * example, use a {@link NumericRangeQuery} to drill down
- * after {@link LongRangeFacetCounts} or {@link DoubleRangeFacetCounts}. */
+ * dimension than the indexed facet ordinals. */
public void add(String dim, Query subQuery) {
+ if (drillDownDims.containsKey(dim)) {
+ throw new IllegalArgumentException("dimension \"" + dim + "\" already has a drill-down");
+ }
// TODO: we should use FilteredQuery?
// So scores of the drill-down query don't have an
@@ -172,6 +171,40 @@
drillDownDims.put(dim, drillDownDims.size());
}
+ /** Expert: add a custom drill-down Filter, e.g. when
+ * drilling down after range faceting. */
+ public void add(String dim, Filter subFilter) {
+
+ if (drillDownDims.containsKey(dim)) {
+ throw new IllegalArgumentException("dimension \"" + dim + "\" already has a drill-down");
+ }
+
+ // TODO: we should use FilteredQuery?
+
+ // So scores of the drill-down query don't have an
+ // effect:
+ final ConstantScoreQuery drillDownQuery = new ConstantScoreQuery(subFilter);
+ drillDownQuery.setBoost(0.0f);
+
+ query.add(drillDownQuery, Occur.MUST);
+
+ drillDownDims.put(dim, drillDownDims.size());
+ }
+
+ static Filter getFilter(Query query) {
+ if (query instanceof ConstantScoreQuery) {
+ ConstantScoreQuery csq = (ConstantScoreQuery) query;
+ Filter filter = csq.getFilter();
+ if (filter != null) {
+ return filter;
+ } else {
+ return getFilter(csq.getQuery());
+ }
+ } else {
+ return null;
+ }
+ }
+
@Override
public DrillDownQuery clone() {
return new DrillDownQuery(config, query, drillDownDims);
@@ -199,7 +232,63 @@
if (query.clauses().size() == 0) {
return new MatchAllDocsQuery();
}
- return query;
+
+ List filters = new ArrayList();
+ List queries = new ArrayList();
+ List clauses = query.clauses();
+ Query baseQuery;
+ int startIndex;
+ if (drillDownDims.size() == query.clauses().size()) {
+ baseQuery = new MatchAllDocsQuery();
+ startIndex = 0;
+ } else {
+ baseQuery = clauses.get(0).getQuery();
+ startIndex = 1;
+ }
+
+ for(int i=startIndex;i weightToIndex = new IdentityHashMap();
-
- private Scorer mainScorer;
-
- public DrillSidewaysCollector(Collector hitCollector, Collector drillDownCollector, Collector[] drillSidewaysCollectors,
- Map dims) {
- this.hitCollector = hitCollector;
- this.drillDownCollector = drillDownCollector;
- this.drillSidewaysCollectors = drillSidewaysCollectors;
- subScorers = new Scorer[dims.size()];
-
- if (dims.size() == 1) {
- // When we have only one dim, we insert the
- // MatchAllDocsQuery, bringing the clause count to
- // 2:
- exactCount = 2;
- } else {
- exactCount = dims.size();
- }
- }
-
- @Override
- public void collect(int doc) throws IOException {
- //System.out.println("collect doc=" + doc + " main.freq=" + mainScorer.freq() + " main.doc=" + mainScorer.docID() + " exactCount=" + exactCount);
-
- if (mainScorer == null) {
- // This segment did not have any docs with any
- // drill-down field & value:
- return;
- }
-
- if (mainScorer.freq() == exactCount) {
- // All sub-clauses from the drill-down filters
- // matched, so this is a "real" hit, so we first
- // collect in both the hitCollector and the
- // drillDown collector:
- //System.out.println(" hit " + drillDownCollector);
- hitCollector.collect(doc);
- if (drillDownCollector != null) {
- drillDownCollector.collect(doc);
- }
-
- // Also collect across all drill-sideways counts so
- // we "merge in" drill-down counts for this
- // dimension.
- for(int i=0;i doc: "subDoc=" + subDoc + " doc=" + doc;
- drillSidewaysCollectors[i].collect(doc);
- assert allMatchesFrom(i+1, doc);
- found = true;
- break;
- }
- }
- assert found;
- }
- }
-
- // Only used by assert:
- private boolean allMatchesFrom(int startFrom, int doc) {
- for(int i=startFrom;i drillDownDims = ddq.getDims();
-
- BooleanQuery topQuery = new BooleanQuery(true);
- final DrillSidewaysCollector collector = new DrillSidewaysCollector(hitCollector, drillDownCollector, drillSidewaysCollectors,
- drillDownDims);
-
- // TODO: if query is already a BQ we could copy that and
- // add clauses to it, instead of doing BQ inside BQ
- // (should be more efficient)? Problem is this can
- // affect scoring (coord) ... too bad we can't disable
- // coord on a clause by clause basis:
- topQuery.add(baseQuery, BooleanClause.Occur.MUST);
-
- // NOTE: in theory we could just make a single BQ, with
- // +query a b c minShouldMatch=2, but in this case,
- // annoyingly, BS2 wraps a sub-scorer that always
- // returns 2 as the .freq(), not how many of the
- // SHOULD clauses matched:
- BooleanQuery subQuery = new BooleanQuery(true);
-
- Query wrappedSubQuery = new QueryWrapper(subQuery,
- new SetWeight() {
- @Override
- public void set(Weight w) {
- collector.setWeight(w, -1);
- }
- });
- Query constantScoreSubQuery = new ConstantScoreQuery(wrappedSubQuery);
-
- // Don't impact score of original query:
- constantScoreSubQuery.setBoost(0.0f);
-
- topQuery.add(constantScoreSubQuery, BooleanClause.Occur.MUST);
-
- // Unfortunately this sub-BooleanQuery
- // will never get BS1 because today BS1 only works
- // if topScorer=true... and actually we cannot use BS1
- // anyways because we need subDocsScoredAtOnce:
- int dimIndex = 0;
+ Query[] drillDownQueries = new Query[clauses.length-startClause];
for(int i=startClause;i 1 || (nullCount == 1 && dims.length == 1)) {
+ // If more than one dim has no matches, then there
+ // are no hits nor drill-sideways counts. Or, if we
+ // have only one dim and that dim has no matches,
+ // same thing.
+ //if (nullCount > 1 || (nullCount == 1 && dims.length == 1)) {
+ if (nullCount > 1) {
return null;
}
// Sort drill-downs by most restrictive first:
Arrays.sort(dims);
- // TODO: it could be better if we take acceptDocs
- // into account instead of baseScorer?
- Scorer baseScorer = baseWeight.scorer(context, scoreDocsInOrder, false, acceptDocs);
-
if (baseScorer == null) {
return null;
}
return new DrillSidewaysScorer(this, context,
- baseScorer,
- drillDownCollector, dims);
+ baseScorer,
+ drillDownCollector, dims,
+ scoreSubDocsAtOnce);
}
};
}
@@ -174,7 +204,7 @@
result = prime * result + ((baseQuery == null) ? 0 : baseQuery.hashCode());
result = prime * result
+ ((drillDownCollector == null) ? 0 : drillDownCollector.hashCode());
- result = prime * result + Arrays.hashCode(drillDownTerms);
+ result = prime * result + Arrays.hashCode(drillDownQueries);
result = prime * result + Arrays.hashCode(drillSidewaysCollectors);
return result;
}
@@ -191,7 +221,7 @@
if (drillDownCollector == null) {
if (other.drillDownCollector != null) return false;
} else if (!drillDownCollector.equals(other.drillDownCollector)) return false;
- if (!Arrays.equals(drillDownTerms, other.drillDownTerms)) return false;
+ if (!Arrays.equals(drillDownQueries, other.drillDownQueries)) return false;
if (!Arrays.equals(drillSidewaysCollectors, other.drillSidewaysCollectors)) return false;
return true;
}
diff -ruN -x .svn -x build ../trunk2/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java
--- ../trunk2/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java 2014-01-29 06:40:22.146528890 -0500
+++ lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java 2014-01-29 06:37:36.850533308 -0500
@@ -22,10 +22,11 @@
import java.util.Collections;
import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
+import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
class DrillSidewaysScorer extends Scorer {
@@ -34,13 +35,15 @@
private final Collector drillDownCollector;
- private final DocsEnumsAndFreq[] dims;
+ private final DocsAndCost[] dims;
// DrillDown DocsEnums:
private final Scorer baseScorer;
private final AtomicReaderContext context;
+ final boolean scoreSubDocsAtOnce;
+
private static final int CHUNK = 2048;
private static final int MASK = CHUNK-1;
@@ -48,12 +51,13 @@
private float collectScore;
DrillSidewaysScorer(Weight w, AtomicReaderContext context, Scorer baseScorer, Collector drillDownCollector,
- DocsEnumsAndFreq[] dims) {
+ DocsAndCost[] dims, boolean scoreSubDocsAtOnce) {
super(w);
this.dims = dims;
this.context = context;
this.baseScorer = baseScorer;
this.drillDownCollector = drillDownCollector;
+ this.scoreSubDocsAtOnce = scoreSubDocsAtOnce;
}
@Override
@@ -67,7 +71,7 @@
drillDownCollector.setScorer(this);
drillDownCollector.setNextReader(context);
}
- for(DocsEnumsAndFreq dim : dims) {
+ for (DocsAndCost dim : dims) {
dim.sidewaysCollector.setScorer(this);
dim.sidewaysCollector.setNextReader(context);
}
@@ -79,26 +83,38 @@
// Position all scorers to their first matching doc:
baseScorer.nextDoc();
- for(DocsEnumsAndFreq dim : dims) {
- for (DocsEnum docsEnum : dim.docsEnums) {
- if (docsEnum != null) {
- docsEnum.nextDoc();
- }
+ int numBits = 0;
+ for (DocsAndCost dim : dims) {
+ if (dim.disi != null) {
+ dim.disi.nextDoc();
+ } else if (dim.bits != null) {
+ numBits++;
}
}
final int numDims = dims.length;
- DocsEnum[][] docsEnums = new DocsEnum[numDims][];
- Collector[] sidewaysCollectors = new Collector[numDims];
+ Bits[] bits = new Bits[numBits];
+ Collector[] bitsSidewaysCollectors = new Collector[numBits];
+
+ DocIdSetIterator[] disis = new DocIdSetIterator[numDims-numBits];
+ Collector[] sidewaysCollectors = new Collector[numDims-numBits];
long drillDownCost = 0;
- for(int dim=0;dim 1 && (dims[1].maxCost < baseQueryCost/10)) {
+ if (bitsUpto > 0 || scoreSubDocsAtOnce || baseQueryCost < drillDownCost/10) {
+ //System.out.println("queryFirst: baseScorer=" + baseScorer + " disis.length=" + disis.length + " bits.length=" + bits.length);
+ doQueryFirstScoring(collector, disis, sidewaysCollectors, bits, bitsSidewaysCollectors);
+ } else if (numDims > 1 && (dims[1].disi == null || dims[1].disi.cost() < baseQueryCost/10)) {
//System.out.println("drillDownAdvance");
- doDrillDownAdvanceScoring(collector, docsEnums, sidewaysCollectors);
+ doDrillDownAdvanceScoring(collector, disis, sidewaysCollectors);
} else {
//System.out.println("union");
- doUnionScoring(collector, docsEnums, sidewaysCollectors);
+ doUnionScoring(collector, disis, sidewaysCollectors);
+ }
+ }
+
+ /** Used when base query is highly constraining vs the
+ * drilldowns, or when the docs must be scored at once
+ * (i.e., like BooleanScorer2, not BooleanScorer). In
+ * this case we just .next() on base and .advance() on
+ * the dim filters. */
+ private void doQueryFirstScoring(Collector collector, DocIdSetIterator[] disis, Collector[] sidewaysCollectors,
+ Bits[] bits, Collector[] bitsSidewaysCollectors) throws IOException {
+ //if (DEBUG) {
+ // System.out.println(" doQueryFirstScoring");
+ //}
+ int docID = baseScorer.docID();
+
+ nextDoc: while (docID != NO_MORE_DOCS) {
+ Collector failedCollector = null;
+ for (int i=0;i docID) {
+ if (failedCollector != null) {
+ // More than one dim fails on this document, so
+ // it's neither a hit nor a near-miss; move to
+ // next doc:
+ docID = baseScorer.nextDoc();
+ continue nextDoc;
+ } else {
+ failedCollector = sidewaysCollectors[i];
+ }
+ }
+ }
+
+ // TODO: for the "non-costly Bits" we really should
+ // have passed them down as acceptDocs, but
+ // unfortunately we cannot distinguish today betwen
+ // "bits() is so costly that you should apply it last"
+ // from "bits() is so cheap that you should apply it
+ // everywhere down low"
+
+ // Fold in Filter Bits last, since they may be costly:
+ for(int i=0;i= dim) {
@@ -299,8 +383,9 @@
counts[slot] = dim+1;
}
}
+
// TODO: sometimes use advance?
- docID = docsEnum.nextDoc();
+ docID = disi.nextDoc();
}
}
}
@@ -309,7 +394,7 @@
//if (DEBUG) {
// System.out.println(" now collect: " + filledCount + " hits");
//}
- for(int i=0;i {
- DocsEnum[] docsEnums;
- // Max cost for all docsEnums for this dim:
- long maxCost;
+ static class DocsAndCost implements Comparable {
+ // Iterator for docs matching this dim's filter, or ...
+ DocIdSetIterator disi;
+ // Random access bits:
+ Bits bits;
Collector sidewaysCollector;
String dim;
@Override
- public int compareTo(DocsEnumsAndFreq other) {
- if (maxCost < other.maxCost) {
+ public int compareTo(DocsAndCost other) {
+ if (disi == null) {
+ if (other.disi == null) {
+ return 0;
+ } else {
+ return 1;
+ }
+ } else if (other.disi == null) {
+ return -1;
+ } else if (disi.cost() < other.disi.cost()) {
return -1;
- } else if (maxCost > other.maxCost) {
+ } else if (disi.cost() > other.disi.cost()) {
return 1;
} else {
return 0;
diff -ruN -x .svn -x build ../trunk2/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRange.java lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRange.java
--- ../trunk2/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRange.java 2014-01-29 06:40:22.150528890 -0500
+++ lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRange.java 2014-01-28 06:25:14.868864446 -0500
@@ -26,7 +26,6 @@
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
-import org.apache.lucene.search.NumericRangeFilter; // javadocs
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.NumericUtils;
@@ -99,14 +98,15 @@
return "DoubleRange(" + minIncl + " to " + maxIncl + ")";
}
- /** Returns a new {@link Filter} accepting only documents
- * in this range. Note that this filter is not
- * efficient: it's a linear scan of all docs, testing
- * each value. If the {@link ValueSource} is static,
- * e.g. an indexed numeric field, then it's more
- * efficient to use {@link NumericRangeFilter}. */
+ @Override
public Filter getFilter(final ValueSource valueSource) {
return new Filter() {
+
+ @Override
+ public String toString() {
+ return "Filter(" + DoubleRange.this.toString();
+ }
+
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException {
@@ -122,46 +122,27 @@
return new DocIdSet() {
@Override
- public DocIdSetIterator iterator() {
- return new DocIdSetIterator() {
- int doc = -1;
-
+ public Bits bits() {
+ return new Bits() {
@Override
- public int nextDoc() throws IOException {
- while (true) {
- doc++;
- if (doc == maxDoc) {
- return doc = NO_MORE_DOCS;
- }
- if (acceptDocs != null && acceptDocs.get(doc) == false) {
- continue;
- }
- double v = values.doubleVal(doc);
- if (accept(v)) {
- return doc;
- }
+ public boolean get(int docID) {
+ if (acceptDocs != null && acceptDocs.get(docID) == false) {
+ return false;
}
+ return accept(values.doubleVal(docID));
}
@Override
- public int advance(int target) throws IOException {
- doc = target-1;
- return nextDoc();
- }
-
- @Override
- public int docID() {
- return doc;
- }
-
- @Override
- public long cost() {
- // Since we do a linear scan over all
- // documents, our cost is O(maxDoc):
+ public int length() {
return maxDoc;
}
};
}
+
+ @Override
+ public DocIdSetIterator iterator() {
+ return new SlowBitsDocIdSetIterator(bits(), maxDoc, acceptDocs);
+ }
};
}
};
diff -ruN -x .svn -x build ../trunk2/lucene/facet/src/java/org/apache/lucene/facet/range/LongRange.java lucene/facet/src/java/org/apache/lucene/facet/range/LongRange.java
--- ../trunk2/lucene/facet/src/java/org/apache/lucene/facet/range/LongRange.java 2014-01-29 06:40:22.150528890 -0500
+++ lucene/facet/src/java/org/apache/lucene/facet/range/LongRange.java 2014-01-28 06:31:20.560854668 -0500
@@ -26,7 +26,6 @@
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
-import org.apache.lucene.search.NumericRangeFilter; // javadocs
import org.apache.lucene.util.Bits;
/** Represents a range over long values. */
@@ -91,14 +90,15 @@
return "LongRange(" + minIncl + " to " + maxIncl + ")";
}
- /** Returns a new {@link Filter} accepting only documents
- * in this range. Note that this filter is not
- * efficient: it's a linear scan of all docs, testing
- * each value. If the {@link ValueSource} is static,
- * e.g. an indexed numeric field, then it's more
- * efficient to use {@link NumericRangeFilter}. */
+ @Override
public Filter getFilter(final ValueSource valueSource) {
return new Filter() {
+
+ @Override
+ public String toString() {
+ return "Filter(" + LongRange.this.toString();
+ }
+
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException {
@@ -114,46 +114,27 @@
return new DocIdSet() {
@Override
- public DocIdSetIterator iterator() {
- return new DocIdSetIterator() {
- int doc = -1;
-
+ public Bits bits() {
+ return new Bits() {
@Override
- public int nextDoc() throws IOException {
- while (true) {
- doc++;
- if (doc == maxDoc) {
- return doc = NO_MORE_DOCS;
- }
- if (acceptDocs != null && acceptDocs.get(doc) == false) {
- continue;
- }
- long v = values.longVal(doc);
- if (accept(v)) {
- return doc;
- }
+ public boolean get(int docID) {
+ if (acceptDocs != null && acceptDocs.get(docID) == false) {
+ return false;
}
+ return accept(values.longVal(docID));
}
@Override
- public int advance(int target) throws IOException {
- doc = target-1;
- return nextDoc();
- }
-
- @Override
- public int docID() {
- return doc;
- }
-
- @Override
- public long cost() {
- // Since we do a linear scan over all
- // documents, our cost is O(maxDoc):
+ public int length() {
return maxDoc;
}
};
}
+
+ @Override
+ public DocIdSetIterator iterator() {
+ return new SlowBitsDocIdSetIterator(bits(), maxDoc, acceptDocs);
+ }
};
}
};
diff -ruN -x .svn -x build ../trunk2/lucene/facet/src/java/org/apache/lucene/facet/range/Range.java lucene/facet/src/java/org/apache/lucene/facet/range/Range.java
--- ../trunk2/lucene/facet/src/java/org/apache/lucene/facet/range/Range.java 2014-01-29 06:40:22.150528890 -0500
+++ lucene/facet/src/java/org/apache/lucene/facet/range/Range.java 2014-01-28 05:45:09.260928795 -0500
@@ -17,6 +17,10 @@
* limitations under the License.
*/
+import org.apache.lucene.queries.function.ValueSource;
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.search.FilteredQuery; // javadocs
+
/** Base class for a single labeled range.
*
* @lucene.experimental */
@@ -33,6 +37,16 @@
this.label = label;
}
+ /** Returns a new {@link Filter} accepting only documents
+ * in this range. This filter is not general-purpose;
+ * you should either use it with {@link DrillSideways} by
+ * adding it to {@link DrillDownQuery#add}, or pass it to
+ * {@link FilteredQuery} using its {@link
+ * FilteredQuery#QUERY_FIRST_FILTER_STRATEGY}. If the
+ * {@link ValueSource} is static, e.g. an indexed numeric
+ * field, then it may be more efficient to use {@link NumericRangeFilter}. */
+ public abstract Filter getFilter(final ValueSource valueSource);
+
/** Invoke this for a useless range. */
protected void failNoMatch() {
throw new IllegalArgumentException("range \"" + label + "\" matches nothing");
diff -ruN -x .svn -x build ../trunk2/lucene/facet/src/java/org/apache/lucene/facet/range/SlowBitsDocIdSetIterator.java lucene/facet/src/java/org/apache/lucene/facet/range/SlowBitsDocIdSetIterator.java
--- ../trunk2/lucene/facet/src/java/org/apache/lucene/facet/range/SlowBitsDocIdSetIterator.java 1969-12-31 19:00:00.000000000 -0500
+++ lucene/facet/src/java/org/apache/lucene/facet/range/SlowBitsDocIdSetIterator.java 2014-01-28 06:26:30.508862423 -0500
@@ -0,0 +1,75 @@
+package org.apache.lucene.facet.range;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.Bits;
+
+/** Wraps a {@link Bits} to make a {@link DocIdSetIterator};
+ * this is typically a slow thing to do and should only be
+ * used as a last resort! */
+class SlowBitsDocIdSetIterator extends DocIdSetIterator {
+ private final Bits bits;
+ private final long cost;
+ private final int maxDoc;
+ private final Bits acceptDocs;
+ private int doc = -1;
+
+ public SlowBitsDocIdSetIterator(Bits bits, long cost, Bits acceptDocs) {
+ this.bits = bits;
+ this.cost = cost;
+ this.maxDoc = bits.length();
+ this.acceptDocs = acceptDocs;
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ if (target == NO_MORE_DOCS) {
+ doc = target;
+ return doc;
+ }
+ doc = target-1;
+ return nextDoc();
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ assert doc != NO_MORE_DOCS;
+ while (true) {
+ doc++;
+ if (doc == maxDoc) {
+ return doc = NO_MORE_DOCS;
+ }
+ if ((acceptDocs == null || acceptDocs.get(doc)) && bits.get(doc)) {
+ return doc;
+ }
+ }
+ }
+
+ @Override
+ public int docID() {
+ return doc;
+ }
+
+ @Override
+ public long cost() {
+ return cost;
+ }
+}
\ No newline at end of file
diff -ruN -x .svn -x build ../trunk2/lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeFacetCounts.java lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeFacetCounts.java
--- ../trunk2/lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeFacetCounts.java 2014-01-29 06:40:22.146528890 -0500
+++ lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeFacetCounts.java 2014-01-29 06:37:53.082532873 -0500
@@ -51,7 +51,6 @@
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.docvalues.DoubleDocValues;
import org.apache.lucene.queries.function.valuesource.FloatFieldSource;
-import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.NumericRangeQuery;
@@ -229,6 +228,10 @@
IndexSearcher s = newSearcher(r);
+ if (VERBOSE) {
+ System.out.println("TEST: searcher=" + s);
+ }
+
DrillSideways ds = new DrillSideways(s, config, tr) {
@Override
@@ -765,16 +768,13 @@
Document doc = new Document();
writer.addDocument(doc);
-
- doc = new Document();
writer.addDocument(doc);
-
- doc = new Document();
writer.addDocument(doc);
+ // Test wants 3 docs in one segment:
writer.forceMerge(1);
- ValueSource vs = new ValueSource() {
+ final ValueSource vs = new ValueSource() {
@SuppressWarnings("rawtypes")
@Override
public FunctionValues getValues(Map ignored, AtomicReaderContext ignored2) {
@@ -801,6 +801,8 @@
throw new UnsupportedOperationException();
}
};
+
+ FacetsConfig config = new FacetsConfig();
FacetsCollector fc = new FacetsCollector();
@@ -808,18 +810,44 @@
IndexSearcher s = newSearcher(r);
s.search(new MatchAllDocsQuery(), fc);
- Facets facets = new DoubleRangeFacetCounts("field", vs, fc,
+ final DoubleRange[] ranges = new DoubleRange[] {
new DoubleRange("< 1", 0.0, true, 1.0, false),
new DoubleRange("< 2", 0.0, true, 2.0, false),
new DoubleRange("< 5", 0.0, true, 5.0, false),
new DoubleRange("< 10", 0.0, true, 10.0, false),
new DoubleRange("< 20", 0.0, true, 20.0, false),
- new DoubleRange("< 50", 0.0, true, 50.0, false));
+ new DoubleRange("< 50", 0.0, true, 50.0, false)};
+
+ Facets facets = new DoubleRangeFacetCounts("field", vs, fc, ranges);
assertEquals("dim=field path=[] value=3 childCount=6\n < 1 (0)\n < 2 (1)\n < 5 (3)\n < 10 (3)\n < 20 (3)\n < 50 (3)\n", facets.getTopChildren(10, "field").toString());
- // Test drill-down:
- assertEquals(1, s.search(new ConstantScoreQuery(new DoubleRange("< 2", 0.0, true, 2.0, false).getFilter(vs)), 10).totalHits);
+ DrillDownQuery ddq = new DrillDownQuery(config);
+ ddq.add("field", ranges[1].getFilter(vs));
+
+ // Test simple drill-down:
+ assertEquals(1, s.search(ddq, 10).totalHits);
+
+ // Test drill-sideways after drill-down
+ DrillSideways ds = new DrillSideways(s, config, (TaxonomyReader) null) {
+
+ @Override
+ protected Facets buildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, String[] drillSidewaysDims) throws IOException {
+ assert drillSideways.length == 1;
+ return new DoubleRangeFacetCounts("field", vs, drillSideways[0], ranges);
+ }
+
+ @Override
+ protected boolean scoreSubDocsAtOnce() {
+ return random().nextBoolean();
+ }
+ };
+
+
+ DrillSidewaysResult dsr = ds.search(ddq, 10);
+ assertEquals(1, dsr.hits.totalHits);
+ assertEquals("dim=field path=[] value=3 childCount=6\n < 1 (0)\n < 2 (1)\n < 5 (3)\n < 10 (3)\n < 20 (3)\n < 50 (3)\n",
+ dsr.facets.getTopChildren(10, "field").toString());
IOUtils.close(r, writer, dir);
}
diff -ruN -x .svn -x build ../trunk2/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java
--- ../trunk2/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java 2014-01-29 06:40:22.146528890 -0500
+++ lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java 2014-01-27 05:25:54.775270973 -0500
@@ -644,7 +644,7 @@
final FixedBitSet bits = new FixedBitSet(maxDoc);
for(int docID=0;docID < maxDoc;docID++) {
// Keeps only the even ids:
- if ((acceptDocs == null || acceptDocs.get(docID)) && ((Integer.parseInt(context.reader().document(docID).get("id")) & 1) == 0)) {
+ if ((acceptDocs == null || acceptDocs.get(docID)) && (Integer.parseInt(context.reader().document(docID).get("id")) & 1) == 0) {
bits.set(docID);
}
}
@@ -688,7 +688,7 @@
// subScorers are on the same docID:
if (!anyMultiValuedDrillDowns) {
// Can only do this test when there are no OR'd
- // drill-down values, beacuse in that case it's
+ // drill-down values, because in that case it's
// easily possible for one of the DD terms to be on
// a future docID:
new DrillSideways(s, config, tr) {