diff -ruN -x .svn -x build ../trunk2/lucene/core/src/java/org/apache/lucene/search/DocIdSet.java lucene/core/src/java/org/apache/lucene/search/DocIdSet.java --- ../trunk2/lucene/core/src/java/org/apache/lucene/search/DocIdSet.java 2014-01-29 06:40:22.102528892 -0500 +++ lucene/core/src/java/org/apache/lucene/search/DocIdSet.java 2014-01-28 05:35:34.432944181 -0500 @@ -31,6 +31,13 @@ * are no docs that match. */ public abstract DocIdSetIterator iterator() throws IOException; + // TODO: somehow this class should express the cost of + // iteration vs the cost of random access Bits; for + // expensive Filters (e.g. distance < 1 km) we should use + // bits() after all other Query/Filters have matched, but + // this is the opposite of what bits() is for now + // (down-low filtering) + /** Optionally provides a {@link Bits} interface for random access * to matching documents. * @return {@code null}, if this {@code DocIdSet} does not support random access. diff -ruN -x .svn -x build ../trunk2/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java --- ../trunk2/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java 2014-01-29 06:40:22.102528892 -0500 +++ lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java 2014-01-28 06:32:42.216852483 -0500 @@ -50,7 +50,7 @@ * @param query Query to be filtered, cannot be null. * @param filter Filter to apply to query results, cannot be null. */ - public FilteredQuery (Query query, Filter filter) { + public FilteredQuery(Query query, Filter filter) { this(query, filter, RANDOM_ACCESS_FILTER_STRATEGY); } @@ -63,7 +63,7 @@ * * @see FilterStrategy */ - public FilteredQuery (Query query, Filter filter, FilterStrategy strategy) { + public FilteredQuery(Query query, Filter filter, FilterStrategy strategy) { if (query == null || filter == null) throw new IllegalArgumentException("Query and filter cannot be null."); if (strategy == null) @@ -118,7 +118,9 @@ // return this query @Override - public Query getQuery() { return FilteredQuery.this; } + public Query getQuery() { + return FilteredQuery.this; + } // return a filtering scorer @Override @@ -130,8 +132,8 @@ // this means the filter does not accept any documents. return null; } + return strategy.filteredScorer(context, scoreDocsInOrder, topScorer, weight, filterDocIdSet); - } }; } @@ -183,14 +185,12 @@ @Override public int advance(int target) throws IOException { - int doc = scorer.advance(target); if (doc != Scorer.NO_MORE_DOCS && !filterbits.get(doc)) { return scorerDoc = nextDoc(); } else { return scorerDoc = doc; } - } @Override @@ -303,7 +303,9 @@ } @Override - public final int freq() throws IOException { return scorer.freq(); } + public final int freq() throws IOException { + return scorer.freq(); + } @Override public final Collection getChildren() { @@ -527,7 +529,7 @@ final Bits filterAcceptDocs = docIdSet.bits(); // force if RA is requested - final boolean useRandomAccess = (filterAcceptDocs != null && (useRandomAccess(filterAcceptDocs, firstFilterDoc))); + final boolean useRandomAccess = filterAcceptDocs != null && useRandomAccess(filterAcceptDocs, firstFilterDoc); if (useRandomAccess) { // if we are using random access, we return the inner scorer, just with other acceptDocs return weight.scorer(context, scoreDocsInOrder, topScorer, filterAcceptDocs); diff -ruN -x .svn -x build ../trunk2/lucene/demo/src/java/org/apache/lucene/demo/facet/DistanceFacetsExample.java lucene/demo/src/java/org/apache/lucene/demo/facet/DistanceFacetsExample.java --- ../trunk2/lucene/demo/src/java/org/apache/lucene/demo/facet/DistanceFacetsExample.java 2014-01-29 06:40:22.070528892 -0500 +++ lucene/demo/src/java/org/apache/lucene/demo/facet/DistanceFacetsExample.java 2014-01-28 06:31:33.784854310 -0500 @@ -29,16 +29,18 @@ import org.apache.lucene.expressions.SimpleBindings; import org.apache.lucene.expressions.js.JavascriptCompiler; import org.apache.lucene.facet.DrillDownQuery; +import org.apache.lucene.facet.DrillSideways; import org.apache.lucene.facet.FacetResult; import org.apache.lucene.facet.Facets; import org.apache.lucene.facet.FacetsCollector; +import org.apache.lucene.facet.FacetsConfig; import org.apache.lucene.facet.range.DoubleRange; import org.apache.lucene.facet.range.DoubleRangeFacetCounts; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.queries.function.ValueSource; -import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.SortField; @@ -59,6 +61,7 @@ private final Directory indexDir = new RAMDirectory(); private IndexSearcher searcher; + private final FacetsConfig config = new FacetsConfig(); /** Empty constructor */ public DistanceFacetsExample() {} @@ -92,7 +95,8 @@ private ValueSource getDistanceValueSource() { Expression distance; try { - distance = JavascriptCompiler.compile("haversin(40.7143528,-74.0059731,latitude,longitude)"); + distance = JavascriptCompiler.compile( + "haversin(40.7143528,-74.0059731,latitude,longitude)"); } catch (ParseException pe) { // Should not happen throw new RuntimeException(pe); @@ -107,7 +111,6 @@ /** User runs a query and counts facets. */ public FacetResult search() throws IOException { - FacetsCollector fc = new FacetsCollector(); searcher.search(new MatchAllDocsQuery(), fc); @@ -127,10 +130,16 @@ // Passing no baseQuery means we drill down on all // documents ("browse only"): DrillDownQuery q = new DrillDownQuery(null); - - q.add("field", new ConstantScoreQuery(range.getFilter(getDistanceValueSource()))); - - return searcher.search(q, 10); + final ValueSource vs = getDistanceValueSource(); + q.add("field", range.getFilter(vs)); + DrillSideways ds = new DrillSideways(searcher, config, (TaxonomyReader) null) { + @Override + protected Facets buildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, String[] drillSidewaysDims) throws IOException { + assert drillSideways.length == 1; + return new DoubleRangeFacetCounts("field", vs, drillSideways[0], ONE_KM, TWO_KM, FIVE_KM, TEN_KM); + } + }; + return ds.search(q, 10).hits; } @Override diff -ruN -x .svn -x build ../trunk2/lucene/facet/src/java/org/apache/lucene/facet/DrillDownQuery.java lucene/facet/src/java/org/apache/lucene/facet/DrillDownQuery.java --- ../trunk2/lucene/facet/src/java/org/apache/lucene/facet/DrillDownQuery.java 2014-01-29 06:40:22.154528890 -0500 +++ lucene/facet/src/java/org/apache/lucene/facet/DrillDownQuery.java 2014-01-28 06:31:59.728853622 -0500 @@ -18,22 +18,20 @@ */ import java.io.IOException; +import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import org.apache.lucene.facet.range.DoubleRangeFacetCounts; -import org.apache.lucene.facet.range.LongRangeFacetCounts; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.Filter; import org.apache.lucene.search.FilteredQuery; import org.apache.lucene.search.MatchAllDocsQuery; -import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; @@ -86,7 +84,7 @@ /** Used by DrillSideways */ DrillDownQuery(FacetsConfig config, Query baseQuery, List clauses, Map drillDownDims) { - this.query = new BooleanQuery(true); + query = new BooleanQuery(true); if (baseQuery != null) { query.add(baseQuery, Occur.MUST); } @@ -155,11 +153,12 @@ /** Expert: add a custom drill-down subQuery. Use this * when you have a separate way to drill-down on the - * dimension than the indexed facet ordinals (for - * example, use a {@link NumericRangeQuery} to drill down - * after {@link LongRangeFacetCounts} or {@link DoubleRangeFacetCounts}. */ + * dimension than the indexed facet ordinals. */ public void add(String dim, Query subQuery) { + if (drillDownDims.containsKey(dim)) { + throw new IllegalArgumentException("dimension \"" + dim + "\" already has a drill-down"); + } // TODO: we should use FilteredQuery? // So scores of the drill-down query don't have an @@ -172,6 +171,40 @@ drillDownDims.put(dim, drillDownDims.size()); } + /** Expert: add a custom drill-down Filter, e.g. when + * drilling down after range faceting. */ + public void add(String dim, Filter subFilter) { + + if (drillDownDims.containsKey(dim)) { + throw new IllegalArgumentException("dimension \"" + dim + "\" already has a drill-down"); + } + + // TODO: we should use FilteredQuery? + + // So scores of the drill-down query don't have an + // effect: + final ConstantScoreQuery drillDownQuery = new ConstantScoreQuery(subFilter); + drillDownQuery.setBoost(0.0f); + + query.add(drillDownQuery, Occur.MUST); + + drillDownDims.put(dim, drillDownDims.size()); + } + + static Filter getFilter(Query query) { + if (query instanceof ConstantScoreQuery) { + ConstantScoreQuery csq = (ConstantScoreQuery) query; + Filter filter = csq.getFilter(); + if (filter != null) { + return filter; + } else { + return getFilter(csq.getQuery()); + } + } else { + return null; + } + } + @Override public DrillDownQuery clone() { return new DrillDownQuery(config, query, drillDownDims); @@ -199,7 +232,63 @@ if (query.clauses().size() == 0) { return new MatchAllDocsQuery(); } - return query; + + List filters = new ArrayList(); + List queries = new ArrayList(); + List clauses = query.clauses(); + Query baseQuery; + int startIndex; + if (drillDownDims.size() == query.clauses().size()) { + baseQuery = new MatchAllDocsQuery(); + startIndex = 0; + } else { + baseQuery = clauses.get(0).getQuery(); + startIndex = 1; + } + + for(int i=startIndex;i weightToIndex = new IdentityHashMap(); - - private Scorer mainScorer; - - public DrillSidewaysCollector(Collector hitCollector, Collector drillDownCollector, Collector[] drillSidewaysCollectors, - Map dims) { - this.hitCollector = hitCollector; - this.drillDownCollector = drillDownCollector; - this.drillSidewaysCollectors = drillSidewaysCollectors; - subScorers = new Scorer[dims.size()]; - - if (dims.size() == 1) { - // When we have only one dim, we insert the - // MatchAllDocsQuery, bringing the clause count to - // 2: - exactCount = 2; - } else { - exactCount = dims.size(); - } - } - - @Override - public void collect(int doc) throws IOException { - //System.out.println("collect doc=" + doc + " main.freq=" + mainScorer.freq() + " main.doc=" + mainScorer.docID() + " exactCount=" + exactCount); - - if (mainScorer == null) { - // This segment did not have any docs with any - // drill-down field & value: - return; - } - - if (mainScorer.freq() == exactCount) { - // All sub-clauses from the drill-down filters - // matched, so this is a "real" hit, so we first - // collect in both the hitCollector and the - // drillDown collector: - //System.out.println(" hit " + drillDownCollector); - hitCollector.collect(doc); - if (drillDownCollector != null) { - drillDownCollector.collect(doc); - } - - // Also collect across all drill-sideways counts so - // we "merge in" drill-down counts for this - // dimension. - for(int i=0;i doc: "subDoc=" + subDoc + " doc=" + doc; - drillSidewaysCollectors[i].collect(doc); - assert allMatchesFrom(i+1, doc); - found = true; - break; - } - } - assert found; - } - } - - // Only used by assert: - private boolean allMatchesFrom(int startFrom, int doc) { - for(int i=startFrom;i drillDownDims = ddq.getDims(); - - BooleanQuery topQuery = new BooleanQuery(true); - final DrillSidewaysCollector collector = new DrillSidewaysCollector(hitCollector, drillDownCollector, drillSidewaysCollectors, - drillDownDims); - - // TODO: if query is already a BQ we could copy that and - // add clauses to it, instead of doing BQ inside BQ - // (should be more efficient)? Problem is this can - // affect scoring (coord) ... too bad we can't disable - // coord on a clause by clause basis: - topQuery.add(baseQuery, BooleanClause.Occur.MUST); - - // NOTE: in theory we could just make a single BQ, with - // +query a b c minShouldMatch=2, but in this case, - // annoyingly, BS2 wraps a sub-scorer that always - // returns 2 as the .freq(), not how many of the - // SHOULD clauses matched: - BooleanQuery subQuery = new BooleanQuery(true); - - Query wrappedSubQuery = new QueryWrapper(subQuery, - new SetWeight() { - @Override - public void set(Weight w) { - collector.setWeight(w, -1); - } - }); - Query constantScoreSubQuery = new ConstantScoreQuery(wrappedSubQuery); - - // Don't impact score of original query: - constantScoreSubQuery.setBoost(0.0f); - - topQuery.add(constantScoreSubQuery, BooleanClause.Occur.MUST); - - // Unfortunately this sub-BooleanQuery - // will never get BS1 because today BS1 only works - // if topScorer=true... and actually we cannot use BS1 - // anyways because we need subDocsScoredAtOnce: - int dimIndex = 0; + Query[] drillDownQueries = new Query[clauses.length-startClause]; for(int i=startClause;i 1 || (nullCount == 1 && dims.length == 1)) { + // If more than one dim has no matches, then there + // are no hits nor drill-sideways counts. Or, if we + // have only one dim and that dim has no matches, + // same thing. + //if (nullCount > 1 || (nullCount == 1 && dims.length == 1)) { + if (nullCount > 1) { return null; } // Sort drill-downs by most restrictive first: Arrays.sort(dims); - // TODO: it could be better if we take acceptDocs - // into account instead of baseScorer? - Scorer baseScorer = baseWeight.scorer(context, scoreDocsInOrder, false, acceptDocs); - if (baseScorer == null) { return null; } return new DrillSidewaysScorer(this, context, - baseScorer, - drillDownCollector, dims); + baseScorer, + drillDownCollector, dims, + scoreSubDocsAtOnce); } }; } @@ -174,7 +204,7 @@ result = prime * result + ((baseQuery == null) ? 0 : baseQuery.hashCode()); result = prime * result + ((drillDownCollector == null) ? 0 : drillDownCollector.hashCode()); - result = prime * result + Arrays.hashCode(drillDownTerms); + result = prime * result + Arrays.hashCode(drillDownQueries); result = prime * result + Arrays.hashCode(drillSidewaysCollectors); return result; } @@ -191,7 +221,7 @@ if (drillDownCollector == null) { if (other.drillDownCollector != null) return false; } else if (!drillDownCollector.equals(other.drillDownCollector)) return false; - if (!Arrays.equals(drillDownTerms, other.drillDownTerms)) return false; + if (!Arrays.equals(drillDownQueries, other.drillDownQueries)) return false; if (!Arrays.equals(drillSidewaysCollectors, other.drillSidewaysCollectors)) return false; return true; } diff -ruN -x .svn -x build ../trunk2/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java --- ../trunk2/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java 2014-01-29 06:40:22.146528890 -0500 +++ lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java 2014-01-29 06:37:36.850533308 -0500 @@ -22,10 +22,11 @@ import java.util.Collections; import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.DocsEnum; import org.apache.lucene.search.Collector; +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Weight; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; class DrillSidewaysScorer extends Scorer { @@ -34,13 +35,15 @@ private final Collector drillDownCollector; - private final DocsEnumsAndFreq[] dims; + private final DocsAndCost[] dims; // DrillDown DocsEnums: private final Scorer baseScorer; private final AtomicReaderContext context; + final boolean scoreSubDocsAtOnce; + private static final int CHUNK = 2048; private static final int MASK = CHUNK-1; @@ -48,12 +51,13 @@ private float collectScore; DrillSidewaysScorer(Weight w, AtomicReaderContext context, Scorer baseScorer, Collector drillDownCollector, - DocsEnumsAndFreq[] dims) { + DocsAndCost[] dims, boolean scoreSubDocsAtOnce) { super(w); this.dims = dims; this.context = context; this.baseScorer = baseScorer; this.drillDownCollector = drillDownCollector; + this.scoreSubDocsAtOnce = scoreSubDocsAtOnce; } @Override @@ -67,7 +71,7 @@ drillDownCollector.setScorer(this); drillDownCollector.setNextReader(context); } - for(DocsEnumsAndFreq dim : dims) { + for (DocsAndCost dim : dims) { dim.sidewaysCollector.setScorer(this); dim.sidewaysCollector.setNextReader(context); } @@ -79,26 +83,38 @@ // Position all scorers to their first matching doc: baseScorer.nextDoc(); - for(DocsEnumsAndFreq dim : dims) { - for (DocsEnum docsEnum : dim.docsEnums) { - if (docsEnum != null) { - docsEnum.nextDoc(); - } + int numBits = 0; + for (DocsAndCost dim : dims) { + if (dim.disi != null) { + dim.disi.nextDoc(); + } else if (dim.bits != null) { + numBits++; } } final int numDims = dims.length; - DocsEnum[][] docsEnums = new DocsEnum[numDims][]; - Collector[] sidewaysCollectors = new Collector[numDims]; + Bits[] bits = new Bits[numBits]; + Collector[] bitsSidewaysCollectors = new Collector[numBits]; + + DocIdSetIterator[] disis = new DocIdSetIterator[numDims-numBits]; + Collector[] sidewaysCollectors = new Collector[numDims-numBits]; long drillDownCost = 0; - for(int dim=0;dim 1 && (dims[1].maxCost < baseQueryCost/10)) { + if (bitsUpto > 0 || scoreSubDocsAtOnce || baseQueryCost < drillDownCost/10) { + //System.out.println("queryFirst: baseScorer=" + baseScorer + " disis.length=" + disis.length + " bits.length=" + bits.length); + doQueryFirstScoring(collector, disis, sidewaysCollectors, bits, bitsSidewaysCollectors); + } else if (numDims > 1 && (dims[1].disi == null || dims[1].disi.cost() < baseQueryCost/10)) { //System.out.println("drillDownAdvance"); - doDrillDownAdvanceScoring(collector, docsEnums, sidewaysCollectors); + doDrillDownAdvanceScoring(collector, disis, sidewaysCollectors); } else { //System.out.println("union"); - doUnionScoring(collector, docsEnums, sidewaysCollectors); + doUnionScoring(collector, disis, sidewaysCollectors); + } + } + + /** Used when base query is highly constraining vs the + * drilldowns, or when the docs must be scored at once + * (i.e., like BooleanScorer2, not BooleanScorer). In + * this case we just .next() on base and .advance() on + * the dim filters. */ + private void doQueryFirstScoring(Collector collector, DocIdSetIterator[] disis, Collector[] sidewaysCollectors, + Bits[] bits, Collector[] bitsSidewaysCollectors) throws IOException { + //if (DEBUG) { + // System.out.println(" doQueryFirstScoring"); + //} + int docID = baseScorer.docID(); + + nextDoc: while (docID != NO_MORE_DOCS) { + Collector failedCollector = null; + for (int i=0;i docID) { + if (failedCollector != null) { + // More than one dim fails on this document, so + // it's neither a hit nor a near-miss; move to + // next doc: + docID = baseScorer.nextDoc(); + continue nextDoc; + } else { + failedCollector = sidewaysCollectors[i]; + } + } + } + + // TODO: for the "non-costly Bits" we really should + // have passed them down as acceptDocs, but + // unfortunately we cannot distinguish today betwen + // "bits() is so costly that you should apply it last" + // from "bits() is so cheap that you should apply it + // everywhere down low" + + // Fold in Filter Bits last, since they may be costly: + for(int i=0;i= dim) { @@ -299,8 +383,9 @@ counts[slot] = dim+1; } } + // TODO: sometimes use advance? - docID = docsEnum.nextDoc(); + docID = disi.nextDoc(); } } } @@ -309,7 +394,7 @@ //if (DEBUG) { // System.out.println(" now collect: " + filledCount + " hits"); //} - for(int i=0;i { - DocsEnum[] docsEnums; - // Max cost for all docsEnums for this dim: - long maxCost; + static class DocsAndCost implements Comparable { + // Iterator for docs matching this dim's filter, or ... + DocIdSetIterator disi; + // Random access bits: + Bits bits; Collector sidewaysCollector; String dim; @Override - public int compareTo(DocsEnumsAndFreq other) { - if (maxCost < other.maxCost) { + public int compareTo(DocsAndCost other) { + if (disi == null) { + if (other.disi == null) { + return 0; + } else { + return 1; + } + } else if (other.disi == null) { + return -1; + } else if (disi.cost() < other.disi.cost()) { return -1; - } else if (maxCost > other.maxCost) { + } else if (disi.cost() > other.disi.cost()) { return 1; } else { return 0; diff -ruN -x .svn -x build ../trunk2/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRange.java lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRange.java --- ../trunk2/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRange.java 2014-01-29 06:40:22.150528890 -0500 +++ lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRange.java 2014-01-28 06:25:14.868864446 -0500 @@ -26,7 +26,6 @@ import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Filter; -import org.apache.lucene.search.NumericRangeFilter; // javadocs import org.apache.lucene.util.Bits; import org.apache.lucene.util.NumericUtils; @@ -99,14 +98,15 @@ return "DoubleRange(" + minIncl + " to " + maxIncl + ")"; } - /** Returns a new {@link Filter} accepting only documents - * in this range. Note that this filter is not - * efficient: it's a linear scan of all docs, testing - * each value. If the {@link ValueSource} is static, - * e.g. an indexed numeric field, then it's more - * efficient to use {@link NumericRangeFilter}. */ + @Override public Filter getFilter(final ValueSource valueSource) { return new Filter() { + + @Override + public String toString() { + return "Filter(" + DoubleRange.this.toString(); + } + @Override public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException { @@ -122,46 +122,27 @@ return new DocIdSet() { @Override - public DocIdSetIterator iterator() { - return new DocIdSetIterator() { - int doc = -1; - + public Bits bits() { + return new Bits() { @Override - public int nextDoc() throws IOException { - while (true) { - doc++; - if (doc == maxDoc) { - return doc = NO_MORE_DOCS; - } - if (acceptDocs != null && acceptDocs.get(doc) == false) { - continue; - } - double v = values.doubleVal(doc); - if (accept(v)) { - return doc; - } + public boolean get(int docID) { + if (acceptDocs != null && acceptDocs.get(docID) == false) { + return false; } + return accept(values.doubleVal(docID)); } @Override - public int advance(int target) throws IOException { - doc = target-1; - return nextDoc(); - } - - @Override - public int docID() { - return doc; - } - - @Override - public long cost() { - // Since we do a linear scan over all - // documents, our cost is O(maxDoc): + public int length() { return maxDoc; } }; } + + @Override + public DocIdSetIterator iterator() { + return new SlowBitsDocIdSetIterator(bits(), maxDoc, acceptDocs); + } }; } }; diff -ruN -x .svn -x build ../trunk2/lucene/facet/src/java/org/apache/lucene/facet/range/LongRange.java lucene/facet/src/java/org/apache/lucene/facet/range/LongRange.java --- ../trunk2/lucene/facet/src/java/org/apache/lucene/facet/range/LongRange.java 2014-01-29 06:40:22.150528890 -0500 +++ lucene/facet/src/java/org/apache/lucene/facet/range/LongRange.java 2014-01-28 06:31:20.560854668 -0500 @@ -26,7 +26,6 @@ import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Filter; -import org.apache.lucene.search.NumericRangeFilter; // javadocs import org.apache.lucene.util.Bits; /** Represents a range over long values. */ @@ -91,14 +90,15 @@ return "LongRange(" + minIncl + " to " + maxIncl + ")"; } - /** Returns a new {@link Filter} accepting only documents - * in this range. Note that this filter is not - * efficient: it's a linear scan of all docs, testing - * each value. If the {@link ValueSource} is static, - * e.g. an indexed numeric field, then it's more - * efficient to use {@link NumericRangeFilter}. */ + @Override public Filter getFilter(final ValueSource valueSource) { return new Filter() { + + @Override + public String toString() { + return "Filter(" + LongRange.this.toString(); + } + @Override public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException { @@ -114,46 +114,27 @@ return new DocIdSet() { @Override - public DocIdSetIterator iterator() { - return new DocIdSetIterator() { - int doc = -1; - + public Bits bits() { + return new Bits() { @Override - public int nextDoc() throws IOException { - while (true) { - doc++; - if (doc == maxDoc) { - return doc = NO_MORE_DOCS; - } - if (acceptDocs != null && acceptDocs.get(doc) == false) { - continue; - } - long v = values.longVal(doc); - if (accept(v)) { - return doc; - } + public boolean get(int docID) { + if (acceptDocs != null && acceptDocs.get(docID) == false) { + return false; } + return accept(values.longVal(docID)); } @Override - public int advance(int target) throws IOException { - doc = target-1; - return nextDoc(); - } - - @Override - public int docID() { - return doc; - } - - @Override - public long cost() { - // Since we do a linear scan over all - // documents, our cost is O(maxDoc): + public int length() { return maxDoc; } }; } + + @Override + public DocIdSetIterator iterator() { + return new SlowBitsDocIdSetIterator(bits(), maxDoc, acceptDocs); + } }; } }; diff -ruN -x .svn -x build ../trunk2/lucene/facet/src/java/org/apache/lucene/facet/range/Range.java lucene/facet/src/java/org/apache/lucene/facet/range/Range.java --- ../trunk2/lucene/facet/src/java/org/apache/lucene/facet/range/Range.java 2014-01-29 06:40:22.150528890 -0500 +++ lucene/facet/src/java/org/apache/lucene/facet/range/Range.java 2014-01-28 05:45:09.260928795 -0500 @@ -17,6 +17,10 @@ * limitations under the License. */ +import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.search.Filter; +import org.apache.lucene.search.FilteredQuery; // javadocs + /** Base class for a single labeled range. * * @lucene.experimental */ @@ -33,6 +37,16 @@ this.label = label; } + /** Returns a new {@link Filter} accepting only documents + * in this range. This filter is not general-purpose; + * you should either use it with {@link DrillSideways} by + * adding it to {@link DrillDownQuery#add}, or pass it to + * {@link FilteredQuery} using its {@link + * FilteredQuery#QUERY_FIRST_FILTER_STRATEGY}. If the + * {@link ValueSource} is static, e.g. an indexed numeric + * field, then it may be more efficient to use {@link NumericRangeFilter}. */ + public abstract Filter getFilter(final ValueSource valueSource); + /** Invoke this for a useless range. */ protected void failNoMatch() { throw new IllegalArgumentException("range \"" + label + "\" matches nothing"); diff -ruN -x .svn -x build ../trunk2/lucene/facet/src/java/org/apache/lucene/facet/range/SlowBitsDocIdSetIterator.java lucene/facet/src/java/org/apache/lucene/facet/range/SlowBitsDocIdSetIterator.java --- ../trunk2/lucene/facet/src/java/org/apache/lucene/facet/range/SlowBitsDocIdSetIterator.java 1969-12-31 19:00:00.000000000 -0500 +++ lucene/facet/src/java/org/apache/lucene/facet/range/SlowBitsDocIdSetIterator.java 2014-01-28 06:26:30.508862423 -0500 @@ -0,0 +1,75 @@ +package org.apache.lucene.facet.range; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.Bits; + +/** Wraps a {@link Bits} to make a {@link DocIdSetIterator}; + * this is typically a slow thing to do and should only be + * used as a last resort! */ +class SlowBitsDocIdSetIterator extends DocIdSetIterator { + private final Bits bits; + private final long cost; + private final int maxDoc; + private final Bits acceptDocs; + private int doc = -1; + + public SlowBitsDocIdSetIterator(Bits bits, long cost, Bits acceptDocs) { + this.bits = bits; + this.cost = cost; + this.maxDoc = bits.length(); + this.acceptDocs = acceptDocs; + } + + @Override + public int advance(int target) throws IOException { + if (target == NO_MORE_DOCS) { + doc = target; + return doc; + } + doc = target-1; + return nextDoc(); + } + + @Override + public int nextDoc() throws IOException { + assert doc != NO_MORE_DOCS; + while (true) { + doc++; + if (doc == maxDoc) { + return doc = NO_MORE_DOCS; + } + if ((acceptDocs == null || acceptDocs.get(doc)) && bits.get(doc)) { + return doc; + } + } + } + + @Override + public int docID() { + return doc; + } + + @Override + public long cost() { + return cost; + } +} \ No newline at end of file diff -ruN -x .svn -x build ../trunk2/lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeFacetCounts.java lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeFacetCounts.java --- ../trunk2/lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeFacetCounts.java 2014-01-29 06:40:22.146528890 -0500 +++ lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeFacetCounts.java 2014-01-29 06:37:53.082532873 -0500 @@ -51,7 +51,6 @@ import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.docvalues.DoubleDocValues; import org.apache.lucene.queries.function.valuesource.FloatFieldSource; -import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.NumericRangeQuery; @@ -229,6 +228,10 @@ IndexSearcher s = newSearcher(r); + if (VERBOSE) { + System.out.println("TEST: searcher=" + s); + } + DrillSideways ds = new DrillSideways(s, config, tr) { @Override @@ -765,16 +768,13 @@ Document doc = new Document(); writer.addDocument(doc); - - doc = new Document(); writer.addDocument(doc); - - doc = new Document(); writer.addDocument(doc); + // Test wants 3 docs in one segment: writer.forceMerge(1); - ValueSource vs = new ValueSource() { + final ValueSource vs = new ValueSource() { @SuppressWarnings("rawtypes") @Override public FunctionValues getValues(Map ignored, AtomicReaderContext ignored2) { @@ -801,6 +801,8 @@ throw new UnsupportedOperationException(); } }; + + FacetsConfig config = new FacetsConfig(); FacetsCollector fc = new FacetsCollector(); @@ -808,18 +810,44 @@ IndexSearcher s = newSearcher(r); s.search(new MatchAllDocsQuery(), fc); - Facets facets = new DoubleRangeFacetCounts("field", vs, fc, + final DoubleRange[] ranges = new DoubleRange[] { new DoubleRange("< 1", 0.0, true, 1.0, false), new DoubleRange("< 2", 0.0, true, 2.0, false), new DoubleRange("< 5", 0.0, true, 5.0, false), new DoubleRange("< 10", 0.0, true, 10.0, false), new DoubleRange("< 20", 0.0, true, 20.0, false), - new DoubleRange("< 50", 0.0, true, 50.0, false)); + new DoubleRange("< 50", 0.0, true, 50.0, false)}; + + Facets facets = new DoubleRangeFacetCounts("field", vs, fc, ranges); assertEquals("dim=field path=[] value=3 childCount=6\n < 1 (0)\n < 2 (1)\n < 5 (3)\n < 10 (3)\n < 20 (3)\n < 50 (3)\n", facets.getTopChildren(10, "field").toString()); - // Test drill-down: - assertEquals(1, s.search(new ConstantScoreQuery(new DoubleRange("< 2", 0.0, true, 2.0, false).getFilter(vs)), 10).totalHits); + DrillDownQuery ddq = new DrillDownQuery(config); + ddq.add("field", ranges[1].getFilter(vs)); + + // Test simple drill-down: + assertEquals(1, s.search(ddq, 10).totalHits); + + // Test drill-sideways after drill-down + DrillSideways ds = new DrillSideways(s, config, (TaxonomyReader) null) { + + @Override + protected Facets buildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, String[] drillSidewaysDims) throws IOException { + assert drillSideways.length == 1; + return new DoubleRangeFacetCounts("field", vs, drillSideways[0], ranges); + } + + @Override + protected boolean scoreSubDocsAtOnce() { + return random().nextBoolean(); + } + }; + + + DrillSidewaysResult dsr = ds.search(ddq, 10); + assertEquals(1, dsr.hits.totalHits); + assertEquals("dim=field path=[] value=3 childCount=6\n < 1 (0)\n < 2 (1)\n < 5 (3)\n < 10 (3)\n < 20 (3)\n < 50 (3)\n", + dsr.facets.getTopChildren(10, "field").toString()); IOUtils.close(r, writer, dir); } diff -ruN -x .svn -x build ../trunk2/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java --- ../trunk2/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java 2014-01-29 06:40:22.146528890 -0500 +++ lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java 2014-01-27 05:25:54.775270973 -0500 @@ -644,7 +644,7 @@ final FixedBitSet bits = new FixedBitSet(maxDoc); for(int docID=0;docID < maxDoc;docID++) { // Keeps only the even ids: - if ((acceptDocs == null || acceptDocs.get(docID)) && ((Integer.parseInt(context.reader().document(docID).get("id")) & 1) == 0)) { + if ((acceptDocs == null || acceptDocs.get(docID)) && (Integer.parseInt(context.reader().document(docID).get("id")) & 1) == 0) { bits.set(docID); } } @@ -688,7 +688,7 @@ // subScorers are on the same docID: if (!anyMultiValuedDrillDowns) { // Can only do this test when there are no OR'd - // drill-down values, beacuse in that case it's + // drill-down values, because in that case it's // easily possible for one of the DD terms to be on // a future docID: new DrillSideways(s, config, tr) {