Index: lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java (revision 1476659) +++ lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java (working copy) @@ -120,12 +120,14 @@ new CountFacetRequest(new CategoryPath("Publish Date"), 10), new CountFacetRequest(new CategoryPath("Author"), 10)); + DrillSideways ds = new DrillSideways(searcher, taxoReader); + // Simple case: drill-down on a single field; in this // case the drill-sideways + drill-down counts == // drill-down of just the query: DrillDownQuery ddq = new DrillDownQuery(fsp.indexingParams, new MatchAllDocsQuery()); ddq.add(new CategoryPath("Author", "Lisa")); - DrillSidewaysResult r = new DrillSideways(searcher, taxoReader).search(null, ddq, 10, fsp); + DrillSidewaysResult r = ds.search(null, ddq, 10, fsp); assertEquals(2, r.hits.totalHits); assertEquals(2, r.facetResults.size()); @@ -143,7 +145,7 @@ // just the query: ddq = new DrillDownQuery(fsp.indexingParams); ddq.add(new CategoryPath("Author", "Lisa")); - r = new DrillSideways(searcher, taxoReader).search(null, ddq, 10, fsp); + r = ds.search(null, ddq, 10, fsp); assertEquals(2, r.hits.totalHits); assertEquals(2, r.facetResults.size()); @@ -162,7 +164,7 @@ // but OR of two values ddq = new DrillDownQuery(fsp.indexingParams, new MatchAllDocsQuery()); ddq.add(new CategoryPath("Author", "Lisa"), new CategoryPath("Author", "Bob")); - r = new DrillSideways(searcher, taxoReader).search(null, ddq, 10, fsp); + r = ds.search(null, ddq, 10, fsp); assertEquals(3, r.hits.totalHits); assertEquals(2, r.facetResults.size()); // Publish Date is only drill-down: Lisa and Bob @@ -177,7 +179,7 @@ ddq = new DrillDownQuery(fsp.indexingParams, new MatchAllDocsQuery()); ddq.add(new CategoryPath("Author", "Lisa")); ddq.add(new CategoryPath("Publish Date", "2010")); - r = new DrillSideways(searcher, taxoReader).search(null, ddq, 10, fsp); + r = ds.search(null, ddq, 10, fsp); assertEquals(1, r.hits.totalHits); assertEquals(2, r.facetResults.size()); // Publish Date is drill-sideways + drill-down: Lisa @@ -195,7 +197,7 @@ ddq.add(new CategoryPath("Author", "Lisa"), new CategoryPath("Author", "Bob")); ddq.add(new CategoryPath("Publish Date", "2010")); - r = new DrillSideways(searcher, taxoReader).search(null, ddq, 10, fsp); + r = ds.search(null, ddq, 10, fsp); assertEquals(2, r.hits.totalHits); assertEquals(2, r.facetResults.size()); // Publish Date is both drill-sideways + drill-down: @@ -211,7 +213,7 @@ fsp = new FacetSearchParams( new CountFacetRequest(new CategoryPath("Publish Date"), 10), new CountFacetRequest(new CategoryPath("Foobar"), 10)); - r = new DrillSideways(searcher, taxoReader).search(null, ddq, 10, fsp); + r = ds.search(null, ddq, 10, fsp); assertEquals(0, r.hits.totalHits); assertEquals(2, r.facetResults.size()); assertEquals("Publish Date:", toString(r.facetResults.get(0))); @@ -224,7 +226,7 @@ fsp = new FacetSearchParams( new CountFacetRequest(new CategoryPath("Publish Date"), 10), new CountFacetRequest(new CategoryPath("Author"), 10)); - r = new DrillSideways(searcher, taxoReader).search(null, ddq, 10, fsp); + r = ds.search(null, ddq, 10, fsp); assertEquals(2, r.hits.totalHits); assertEquals(2, r.facetResults.size()); // Publish Date is only drill-down, and Lisa published @@ -242,7 +244,7 @@ new CategoryPath("Author", "Tom")); fsp = new FacetSearchParams( new CountFacetRequest(new CategoryPath("Publish Date"), 10)); - r = new DrillSideways(searcher, taxoReader).search(null, ddq, 10, fsp); + r = ds.search(null, ddq, 10, fsp); assertEquals(2, r.hits.totalHits); assertEquals(1, r.facetResults.size()); // Publish Date is only drill-down, and Lisa published @@ -255,7 +257,7 @@ new CountFacetRequest(new CategoryPath("Author"), 10)); ddq = new DrillDownQuery(fsp.indexingParams, new TermQuery(new Term("foobar", "baz"))); ddq.add(new CategoryPath("Author", "Lisa")); - r = new DrillSideways(searcher, taxoReader).search(null, ddq, 10, fsp); + r = ds.search(null, ddq, 10, fsp); assertEquals(0, r.hits.totalHits); assertEquals(2, r.facetResults.size()); Index: lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysCollector.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysCollector.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysCollector.java (working copy) @@ -0,0 +1,174 @@ +package org.apache.lucene.facet.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Arrays; +import java.util.IdentityHashMap; +import java.util.Map; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.Scorer.ChildScorer; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Weight; + +/** Collector that scrutinizes each hit to determine if it + * passed all constraints (a true hit) or if it missed + * exactly one dimension (a near-miss, to count for + * drill-sideways counts on that dimension). */ +class DrillSidewaysCollector extends Collector { + + private final Collector hitCollector; + private final Collector drillDownCollector; + private final Collector[] drillSidewaysCollectors; + private final Scorer[] subScorers; + private final int exactCount; + + // Maps Weight to either -1 (mainQuery) or to integer + // index of the dims drillDown. We needs this when + // visiting the child scorers to correlate back to the + // right scorers: + private final Map weightToIndex = new IdentityHashMap(); + + private Scorer mainScorer; + + public DrillSidewaysCollector(Collector hitCollector, Collector drillDownCollector, Collector[] drillSidewaysCollectors, + Map dims) { + this.hitCollector = hitCollector; + this.drillDownCollector = drillDownCollector; + this.drillSidewaysCollectors = drillSidewaysCollectors; + subScorers = new Scorer[dims.size()]; + + if (dims.size() == 1) { + // When we have only one dim, we insert the + // MatchAllDocsQuery, bringing the clause count to + // 2: + exactCount = 2; + } else { + exactCount = dims.size(); + } + } + + @Override + public void collect(int doc) throws IOException { + //System.out.println("collect doc=" + doc + " main.freq=" + mainScorer.freq() + " main.doc=" + mainScorer.docID() + " exactCount=" + exactCount); + + if (mainScorer == null) { + // This segment did not have any docs with any + // drill-down field & value: + return; + } + + if (mainScorer.freq() == exactCount) { + // All sub-clauses from the drill-down filters + // matched, so this is a "real" hit, so we first + // collect in both the hitCollector and the + // drillDown collector: + //System.out.println(" hit " + drillDownCollector); + hitCollector.collect(doc); + drillDownCollector.collect(doc); + + // Also collect across all drill-sideways counts so + // we "merge in" drill-down counts for this + // dimension. + for(int i=0;i doc: "subDoc=" + subDoc + " doc=" + doc; + drillSidewaysCollectors[i].collect(doc); + assert allMatchesFrom(i+1, doc); + break; + } + } + } + } + + // Only used by assert: + private boolean allMatchesFrom(int startFrom, int doc) { + for(int i=startFrom;i drillDownDims = query.getDims(); if (drillDownDims.isEmpty()) { - // Just do ordinary search: + // Just do ordinary search when there are no drill-downs: FacetsCollector c = FacetsCollector.create(getDrillDownAccumulator(fsp)); searcher.search(query, MultiCollector.wrap(hitCollector, c)); return new DrillSidewaysResult(c.getFacetResults(), null); @@ -171,25 +173,6 @@ startClause = 1; } - Term[][] drillDownTerms = new Term[clauses.length-startClause][]; - for(int i=startClause;i[] drillSidewaysResults = new List[numDims]; List drillDownResults = null; @@ -230,7 +253,9 @@ // Lazy init, in case all requests were against // drill-sideways dims: drillDownResults = drillDownCollector.getFacetResults(); + //System.out.println("get DD results"); } + //System.out.println("add dd results " + i); mergedResults.add(drillDownResults.get(i)); } else { // Drill sideways dim: @@ -250,6 +275,93 @@ return new DrillSidewaysResult(mergedResults, null); } + /** Uses the more general but slower method of sideways + * counting. This method allows an arbitrary subQuery to + * implement the drill down for a given dimension. */ + private void collectorMethod(DrillDownQuery ddq, Query baseQuery, int startClause, Collector hitCollector, Collector drillDownCollector, Collector[] drillSidewaysCollectors) throws IOException { + + BooleanClause[] clauses = ddq.getBooleanQuery().getClauses(); + + Map drillDownDims = ddq.getDims(); + + BooleanQuery topQuery = new BooleanQuery(true); + final DrillSidewaysCollector collector = new DrillSidewaysCollector(hitCollector, drillDownCollector, drillSidewaysCollectors, + drillDownDims); + + // TODO: if query is already a BQ we could copy that and + // add clauses to it, instead of doing BQ inside BQ + // (should be more efficient)? Problem is this can + // affect scoring (coord) ... too bad we can't disable + // coord on a clause by clause basis: + topQuery.add(baseQuery, BooleanClause.Occur.MUST); + + // NOTE: in theory we could just make a single BQ, with + // +query a b c minShouldMatch=2, but in this case, + // annoyingly, BS2 wraps a sub-scorer that always + // returns 2 as the .freq(), not how many of the + // SHOULD clauses matched: + BooleanQuery subQuery = new BooleanQuery(true); + + Query wrappedSubQuery = new QueryWrapper(subQuery, + new SetWeight() { + @Override + public void set(Weight w) { + collector.setWeight(w, -1); + } + }); + Query constantScoreSubQuery = new ConstantScoreQuery(wrappedSubQuery); + + // Don't impact score of original query: + constantScoreSubQuery.setBoost(0.0f); + + topQuery.add(constantScoreSubQuery, BooleanClause.Occur.MUST); + + // Unfortunately this sub-BooleanQuery + // will never get BS1 because today BS1 only works + // if topScorer=true... and actually we cannot use BS1 + // anyways because we need subDocsScoredAtOnce: + int dimIndex = 0; + for(int i=startClause;i 1 && (dims[1].maxCost < baseQueryCost/10)) { - //System.out.println("drillDownAdvance"); - doDrillDownAdvanceScoring(collector, docsEnums, sidewaysCollectors); - } else { - //System.out.println("union"); - doUnionScoring(collector, docsEnums, sidewaysCollectors); - } - } else { - // TODO: we should fallback to BS2 ReqOptSum scorer here + if (baseQueryCost < drillDownCost/10) { //System.out.println("baseAdvance"); doBaseAdvanceScoring(collector, docsEnums, sidewaysCollectors); + } else if (numDims > 1 && (dims[1].maxCost < baseQueryCost/10)) { + //System.out.println("drillDownAdvance"); + doDrillDownAdvanceScoring(collector, docsEnums, sidewaysCollectors); + } else { + //System.out.println("union"); + doUnionScoring(collector, docsEnums, sidewaysCollectors); } } Index: lucene/facet/src/java/org/apache/lucene/facet/search/DrillDownQuery.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/DrillDownQuery.java (revision 1476659) +++ lucene/facet/src/java/org/apache/lucene/facet/search/DrillDownQuery.java (working copy) @@ -167,11 +167,25 @@ } q = bq; } - drillDownDims.put(dim, drillDownDims.size()); - final ConstantScoreQuery drillDownQuery = new ConstantScoreQuery(q); + add(dim, q); + } + + /** Expert: add a custom drill-down subQuery. Use this + * when you have a separate way to drill-down on the + * dimension than the indexed facet ordinals. */ + public void add(String dim, Query subQuery) { + + // TODO: we should use FilteredQuery? + + // So scores of the drill-down query don't have an + // effect: + final ConstantScoreQuery drillDownQuery = new ConstantScoreQuery(subQuery); drillDownQuery.setBoost(0.0f); + query.add(drillDownQuery, Occur.MUST); + + drillDownDims.put(dim, drillDownDims.size()); } @Override