Index: lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java (revision 0) +++ lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java (working copy) @@ -0,0 +1,170 @@ +package org.apache.lucene.facet.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.TextField; +import org.apache.lucene.facet.FacetTestCase; +import org.apache.lucene.facet.FacetTestUtils; +import org.apache.lucene.facet.index.FacetFields; +import org.apache.lucene.facet.search.DrillSideways.DrillSidewaysResult; +import org.apache.lucene.facet.search.params.CountFacetRequest; +import org.apache.lucene.facet.search.params.FacetSearchParams; +import org.apache.lucene.facet.search.results.FacetResult; +import org.apache.lucene.facet.search.results.FacetResultNode; +import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; +import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; +import org.apache.lucene.facet.util.PrintTaxonomyStats; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.store.Directory; + +public class TestDrillSideways extends FacetTestCase { + + private DirectoryTaxonomyWriter taxoWriter; + private RandomIndexWriter writer; + private FacetFields facetFields; + + private void add(String ... categoryPaths) throws IOException { + Document doc = new Document(); + List paths = new ArrayList(); + for(String categoryPath : categoryPaths) { + paths.add(new CategoryPath(categoryPath, '/')); + } + facetFields.addFields(doc, paths); + writer.addDocument(doc); + } + + public void test() throws Exception { + Directory dir = newDirectory(); + Directory taxoDir = newDirectory(); + writer = new RandomIndexWriter(random(), dir); + + // Writes facet ords to a separate directory from the + // main index: + taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE); + + // Reused across documents, to add the necessary facet + // fields: + facetFields = new FacetFields(taxoWriter); + + add("Author/Bob", "Publish Date/2010/10/15"); + add("Author/Lisa", "Publish Date/2010/10/20"); + add("Author/Lisa", "Publish Date/2012/1/1"); + add("Author/Susan", "Publish Date/2012/1/7"); + add("Author/Frank", "Publish Date/1999/5/5"); + + // NRT open + IndexSearcher searcher = newSearcher(writer.getReader()); + writer.close(); + + //System.out.println("searcher=" + searcher); + + // NRT open + TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); + taxoWriter.close(); + + // Count both "Publish Date" and "Author" dimensions, in + // drill-down: + FacetSearchParams fsp = new FacetSearchParams( + new CountFacetRequest(new CategoryPath("Publish Date"), 10), + new CountFacetRequest(new CategoryPath("Author"), 10)); + + // Simple case: drill-down on a single field; in this + // case the drill-sideways + drill-down counts == + // drill-down of just the query: + DrillSideways ds = new DrillSideways(searcher, taxoReader, fsp.indexingParams); + ds.addDrillDown(new CategoryPath("Author", "Lisa")); + DrillSidewaysResult r = ds.search(new MatchAllDocsQuery(), 10, fsp); + + assertEquals(2, r.hits.totalHits); + assertEquals(2, r.facetResults.size()); + // Publish Date is only drill-down, and Lisa published + // one in 2012 and one in 2010: + assertEquals("Publish Date: 2012=1 2010=1", toString(r.facetResults.get(0))); + // Author is drill-sideways + drill-down: Lisa + // (drill-down) published twice, and Frank/Susan/Bob + // published once: + assertEquals("Author: Lisa=2 Frank=1 Susan=1 Bob=1", toString(r.facetResults.get(1))); + + // More interesting case: drill-down on two fields + ds = new DrillSideways(searcher, taxoReader, fsp.indexingParams); + ds.addDrillDown(new CategoryPath("Author", "Lisa")); + ds.addDrillDown(new CategoryPath("Publish Date", "2010")); + r = ds.search(new MatchAllDocsQuery(), 10, fsp); + assertEquals(1, r.hits.totalHits); + assertEquals(2, r.facetResults.size()); + // Publish Date is drill-sideways + drill-down: Lisa + // (drill-down) published once in 2010 and once in 2012: + assertEquals("Publish Date: 2012=1 2010=1", toString(r.facetResults.get(0))); + // Author is drill-sideways + drill-down: + // only Lisa & Bob published (once each) in 2010: + assertEquals("Author: Lisa=1 Bob=1", toString(r.facetResults.get(1))); + + // Even more interesting case: drill down on two fields, + // but one of them is OR + ds = new DrillSideways(searcher, taxoReader, fsp.indexingParams); + + // Drill down on Lisa or Bob: + ds.addDrillDown(new CategoryPath("Author", "Lisa"), + new CategoryPath("Author", "Bob")); + ds.addDrillDown(new CategoryPath("Publish Date", "2010")); + r = ds.search(new MatchAllDocsQuery(), 10, fsp); + assertEquals(2, r.hits.totalHits); + assertEquals(2, r.facetResults.size()); + // Publish Date is both drill-sideways + drill-down: + // Lisa or Bob published twice in 2010 and once in 2012: + assertEquals("Publish Date: 2010=2 2012=1", toString(r.facetResults.get(0))); + // Author is drill-sideways + drill-down: + // only Lisa & Bob published (once each) in 2010: + assertEquals("Author: Lisa=1 Bob=1", toString(r.facetResults.get(1))); + + searcher.getIndexReader().close(); + taxoReader.close(); + dir.close(); + taxoDir.close(); + } + + /** Just gathers counts of values under the dim. */ + private String toString(FacetResult fr) { + StringBuilder b = new StringBuilder(); + FacetResultNode node = fr.getFacetResultNode(); + b.append(node.label); + b.append(":"); + for(FacetResultNode childNode : node.subResults) { + b.append(' '); + b.append(childNode.label.components[1]); + b.append('='); + b.append((int) childNode.value); + } + return b.toString(); + } +} + Property changes on: lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/search/DrillDown.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/DrillDown.java (revision 1443165) +++ lucene/facet/src/java/org/apache/lucene/facet/search/DrillDown.java (working copy) @@ -75,6 +75,8 @@ * NOTE: {@code baseQuery} can be {@code null}, in which case only the * {@link Query} over the categories will is returned. */ + // nocommit how does one drill down on one field AND drill + // down on OR of other fields? public static final Query query(FacetIndexingParams iParams, Query baseQuery, Occur occur, CategoryPath... paths) { if (paths == null || paths.length == 0) { throw new IllegalArgumentException("Empty category path not allowed for drill down query!"); Index: lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java (working copy) @@ -0,0 +1,356 @@ +package org.apache.lucene.facet.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import org.apache.lucene.facet.index.params.FacetIndexingParams; +import org.apache.lucene.facet.search.params.CountFacetRequest; +import org.apache.lucene.facet.search.params.FacetRequest; +import org.apache.lucene.facet.search.params.FacetSearchParams; +import org.apache.lucene.facet.search.results.FacetResult; +import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.Filter; +import org.apache.lucene.search.FilteredQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorer.ChildScorer; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.TopFieldCollector; +import org.apache.lucene.search.TopScoreDocCollector; +import org.apache.lucene.search.Weight; + +// nocommit explain javadocs that this allocates N facet +// arrays under the hood so if you have many ords... + +// TODO: maybe open up a way to pass "prior drill down +// results" for the newly drill down dim? Add serious hair +// (plus app must round-trip this in a stateles server) +// but should be good perf win ... + +// nocommit merge with DrillDown? + +/** + @lucene.experimental + */ +public final class DrillSideways { + + private final IndexSearcher searcher; + private final TaxonomyReader taxoReader; + private final List drillDownQueries = new ArrayList(); + private final Map drillDownDims = new LinkedHashMap(); + private final FacetIndexingParams fip; + + public DrillSideways(IndexSearcher searcher, TaxonomyReader taxoReader, FacetIndexingParams fip) { + this.searcher = searcher; + this.taxoReader = taxoReader; + this.fip = fip; + } + + /** Adds one dimension of drill downs; if you pass + * multiple values they are OR'd. */ + public void addDrillDown(CategoryPath... paths) { + Query q; + if (paths[0].length == 0) { + throw new IllegalArgumentException("all CategoryPaths must have length > 0"); + } + String dim = paths[0].components[0]; + if (drillDownDims.containsKey(dim)) { + throw new IllegalArgumentException("dimension '" + dim + "' was already added"); + } + if (paths.length == 1) { + q = new TermQuery(DrillDown.term(fip, paths[0])); + } else { + BooleanQuery bq = new BooleanQuery(true); // disable coord + for (CategoryPath cp : paths) { + if (cp.length == 0) { + throw new IllegalArgumentException("all CategoryPaths must have length > 0"); + } + if (!cp.components[0].equals(dim)) { + throw new IllegalArgumentException("multiple (OR'd) drill-down paths must be under same dimension; got '" + dim + "' and '" + cp.components[0] + "'"); + } + bq.add(new TermQuery(DrillDown.term(fip, cp)), Occur.SHOULD); + } + q = bq; + } + drillDownDims.put(dim, drillDownDims.size()); + + final ConstantScoreQuery drillDownQuery = new ConstantScoreQuery(q); + drillDownQuery.setBoost(0.0f); + drillDownQueries.add(drillDownQuery); + } + + // nocommit also take "after"? + public DrillSidewaysResult search(Query query, Collector hitCollector, FacetSearchParams fsp) throws IOException { + + if (drillDownQueries.size() < 1) { + throw new IllegalStateException("there must be at least one drill-down"); + } + + for(int i=0;i 0"); + } + } + BooleanQuery topQuery = new BooleanQuery(); + topQuery.add(query, BooleanClause.Occur.MUST); + + BooleanQuery subQuery = new BooleanQuery(); + topQuery.add(subQuery, BooleanClause.Occur.MUST); + for(Query drillDownQuery : drillDownQueries) { + subQuery.add(drillDownQuery, BooleanClause.Occur.SHOULD); + } + + // TODO: we could better optimize the "just one drill + // down" case w/ a separate [specialized] + // collector... + + int minShouldMatch = drillDownQueries.size()-1; + if (drillDownQueries.size() == 1) { + // Must add another "fake" clause so BQ doesn't erase + // itself by rewriting to the single clause: + Query end = new MatchAllDocsQuery(); + end.setBoost(0.0f); + subQuery.add(end, BooleanClause.Occur.SHOULD); + minShouldMatch++; + } + subQuery.setMinimumNumberShouldMatch(minShouldMatch); + + DrillSidewaysCollector collector = new DrillSidewaysCollector(hitCollector, drillDownDims, fsp, searcher, taxoReader); + + searcher.search(topQuery, collector); + + List drillDownResults = collector.drillDownCollector.getFacetResults(); + + List mergedResults = new ArrayList(); + for(int i=0;i 0; + Integer idx = drillDownDims.get(fr.categoryPath.components[0]); + if (idx == null) { + // Pure drill down dim (the current query didn't + // drill down on this dim): + mergedResults.add(drillDownResults.get(i)); + } else { + // Drill sideways dim: + List sidewaysResult = collector.drillSidewaysCollectors[idx.intValue()].getFacetResults(); + assert sidewaysResult.size() == 1; + mergedResults.add(sidewaysResult.get(0)); + } + } + + return new DrillSidewaysResult(mergedResults, null); + } + + public DrillSidewaysResult search(Query query, Filter filter, int topN, Sort sort, boolean doDocScores, boolean doMaxScore, FacetSearchParams fsp) throws IOException { + if (filter != null) { + query = new FilteredQuery(query, filter); + } + if (sort != null) { + final TopFieldCollector hitCollector = TopFieldCollector.create(sort, + Math.max(topN, searcher.getIndexReader().maxDoc()), + null, + true, + doDocScores, + doMaxScore, + true); + DrillSidewaysResult r = search(query, hitCollector, fsp); + r.hits = hitCollector.topDocs(); + return r; + } else { + return search(query, topN, fsp); + } + } + + public DrillSidewaysResult search(Query query, int topN, FacetSearchParams fsp) throws IOException { + TopScoreDocCollector hitCollector = TopScoreDocCollector.create(Math.min(topN, searcher.getIndexReader().maxDoc()), null, true); + DrillSidewaysResult r = search(query, hitCollector, fsp); + r.hits = hitCollector.topDocs(); + return r; + } + + private static class DrillSidewaysCollector extends Collector { + + private final Collector hitCollector; + private final FacetsCollector drillDownCollector; + private final FacetsCollector[] drillSidewaysCollectors; + private final Scorer[] subScorers; + private final int exactCount; + + private Scorer scorer; + private Scorer mainScorer; + + public DrillSidewaysCollector(Collector hitCollector, Map dims, FacetSearchParams fsp, IndexSearcher searcher, TaxonomyReader taxoReader) { + this.hitCollector = hitCollector; + drillDownCollector = FacetsCollector.create(fsp, searcher.getIndexReader(), taxoReader); + subScorers = new Scorer[dims.size()]; + + if (dims.size() == 1) { + // When we have only one dim, we insert the + // MatchAllDocsQuery, bringing the clause count to + // 2: + exactCount = 2; + } else { + exactCount = dims.size(); + } + + // nocommit: we should be able to share a single + // FacetArrays across all of these + // collectors because the ords will be orthogonal + // ... problem is we'd have to determine at decode + // time whether the ord falls under the dim we must + // count for this one hit ... if app has only one dim + // in each CLP then we could do that w/o the if! + drillSidewaysCollectors = new FacetsCollector[dims.size()]; + int idx = 0; + for(String dim : dims.keySet()) { + FacetRequest drillSidewaysRequest = null; + for(FacetRequest fr : fsp.facetRequests) { + assert fr.categoryPath.length > 0; + if (fr.categoryPath.components[0].equals(dim)) { + if (drillSidewaysRequest != null) { + throw new IllegalArgumentException("multiple FacetRequests for drill-sideways dimension \"" + dim + "\""); + } + drillSidewaysRequest = fr; + } + } + if (drillSidewaysRequest == null) { + throw new IllegalArgumentException("could not find FacetReuquest for drill-sideways dimension \"" + dim + "\""); + } + drillSidewaysCollectors[idx++] = FacetsCollector.create(new FacetSearchParams(fsp.indexingParams, drillSidewaysRequest), + searcher.getIndexReader(), taxoReader); + } + } + + @Override + public void collect(int doc) throws IOException { + //System.out.println("collect doc=" + doc + " main.freq=" + mainScorer.freq() + " main.doc=" + mainScorer.docID() + " exactCount=" + exactCount); + if (mainScorer.freq() == exactCount) { + // All sub-clauses from the drill-down filters + // matched, so this is a "real" hit, so we first + // collect in both the hitCollector and the + // drillDown collector: + //System.out.println(" hit"); + hitCollector.collect(doc); + drillDownCollector.collect(doc); + + // Also collect across all drill-sideways counts so + // we "merge in" drill-down counts for this + // dimension. + for(int i=0;i doc; + drillSidewaysCollectors[i].collect(doc); + assert allMatchesFrom(i+1, doc); + // nocommit assert "the rest" match + break; + } + } + } + } + + // Only used by assert: + private boolean allMatchesFrom(int startFrom, int doc) { + for(int i=startFrom;i