Index: lucene/facet/src/java/org/apache/lucene/facet/search/DrillDown.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/DrillDown.java (revision 1441819) +++ lucene/facet/src/java/org/apache/lucene/facet/search/DrillDown.java (working copy) @@ -75,6 +75,8 @@ * NOTE: {@code baseQuery} can be {@code null}, in which case only the * {@link Query} over the categories will is returned. */ + // nocommit how does one drill down on one field AND drill + // down on OR of other fields? public static final Query query(FacetIndexingParams iParams, Query baseQuery, Occur occur, CategoryPath... paths) { if (paths == null || paths.length == 0) { throw new IllegalArgumentException("Empty category path not allowed for drill down query!"); Index: lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java (working copy) @@ -0,0 +1,224 @@ +package org.apache.lucene.facet.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.apache.lucene.facet.search.params.CountFacetRequest; +import org.apache.lucene.facet.search.params.FacetRequest; +import org.apache.lucene.facet.search.params.FacetSearchParams; +import org.apache.lucene.facet.search.results.FacetResult; +import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorer.ChildScorer; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.TopScoreDocCollector; +import org.apache.lucene.search.Weight; + +// nocommit we need per-dim control of AND vs OR, eg maybe i +// drill down on Price < 100 AND (Brand = 'Sony' OR Brand = +// 'Nikon'); really each dim should be list of CPs...? + +public final class DrillSideways { + + private static class DrillSidewaysCollector extends Collector { + + private final Collector hitCollector; + private final CategoryPath[] paths; + private final FacetsCollector drillDownCollector; + private final FacetsCollector[] drillSidewaysCollectors; + private final Scorer[] subScorers; + private final int exactCount; + + private Scorer scorer; + private Scorer mainScorer; + + public DrillSidewaysCollector(Collector hitCollector, CategoryPath[] paths, FacetSearchParams fsp, IndexSearcher searcher, TaxonomyReader taxoReader) { + this.hitCollector = hitCollector; + this.paths = paths; + drillDownCollector = FacetsCollector.create(fsp, searcher.getIndexReader(), taxoReader); + subScorers = new Scorer[paths.length]; + exactCount = Math.max(2, paths.length); + + // nocommit: we should be able to share a single + // FacetArrays across all of these + // collectors because the ords will be orthogonal + // ... problem is we'd have to determine at decode + // time whether the ord falls under the dim we must + // count for this one hit ... tricky + drillSidewaysCollectors = new FacetsCollector[paths.length]; + for(int i=0;i doc; + drillSidewaysCollectors[i].collect(doc); + break; + } + } + } + } + + @Override + public boolean acceptsDocsOutOfOrder() { + // We actually could accept docs out of order, but, we + // need to force BooleanScorer2 so that the + // sub-scorers are "on" each docID we are collecting: + return false; + } + + @Override + public void setNextReader(AtomicReaderContext leaf) throws IOException { + hitCollector.setNextReader(leaf); + drillDownCollector.setNextReader(leaf); + for(Collector dsc : drillSidewaysCollectors) { + dsc.setNextReader(leaf); + } + } + + @Override + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + + //System.out.println(" scorer=" + scorer); + // nocommit fragile to rely on this being List: + List childScorers = (List) scorer.getChildren(); + assert childScorers.size() == 2; + //System.out.println("childScorers=" + childScorers.get(1).child); + + // nocommit fragile: need tracker somehow... + mainScorer = childScorers.get(1).child; + + // nocommit fragile to rely on this being List: + List childSubScorers = (List) mainScorer.getChildren(); + for(int i=0;i drillDownResults; + public final List drillSidewaysResults; + public final TopDocs hits; + + DrillSidewaysResult(List drillDownResults, List drillSidewaysResults, TopDocs hits) { + this.drillDownResults = drillDownResults; + this.drillSidewaysResults = drillSidewaysResults; + this.hits = hits; + } + } + + // nocommit also take Sort, Collector + public static DrillSidewaysResult search(TaxonomyReader taxoReader, IndexSearcher searcher, + int topN, Query mainQuery, FacetSearchParams fsp, + CategoryPath... drillDownPaths) throws IOException { + + if (drillDownPaths.length < 1) { + throw new IllegalArgumentException("there must be at least one drilldown CategoryPath"); + } + + // nocommit must validate that each dim only appears + // "once" + + // nocommit use TermsFilter for dims that OR multiple + // terms... + + // nocommit add a "just the filter only" API in + // DrillDown and share it + + BooleanQuery topQuery = new BooleanQuery(); + topQuery.add(mainQuery, BooleanClause.Occur.MUST); + + BooleanQuery subQuery = new BooleanQuery(); + topQuery.add(subQuery, BooleanClause.Occur.MUST); + for(CategoryPath cp : drillDownPaths) { + TermQuery tq = new TermQuery(DrillDown.term(fsp.indexingParams, cp)); + Query csq = new ConstantScoreQuery(tq); + csq.setBoost(0.0f); + subQuery.add(csq, BooleanClause.Occur.SHOULD); + } + if (drillDownPaths.length == 1) { + // TODO: we could better optimize the "just one drill + // down" case w/ a separate [specialized] collector... + // Must add another clause so BQ doesn't erase itself + // by rewriting to the single clause: + Query end = new MatchAllDocsQuery(); + end.setBoost(0.0f); + subQuery.add(end, BooleanClause.Occur.SHOULD); + subQuery.setMinimumNumberShouldMatch(drillDownPaths.length); + } else { + subQuery.setMinimumNumberShouldMatch(drillDownPaths.length-1); + } + + TopScoreDocCollector hitCollector = TopScoreDocCollector.create(Math.min(topN, searcher.getIndexReader().maxDoc()), null, false); + + DrillSidewaysCollector collector = new DrillSidewaysCollector(hitCollector, drillDownPaths, fsp, searcher, taxoReader); + + searcher.search(topQuery, collector); + + List drillSidewaysResults = new ArrayList(); + for(FacetsCollector c : collector.drillSidewaysCollectors) { + List dimResult = c.getFacetResults(); + // Each drill sideways result only counts one dimension: + assert dimResult.size() == 1; + drillSidewaysResults.add(dimResult.get(0)); + } + return new DrillSidewaysResult(collector.drillDownCollector.getFacetResults(), + drillSidewaysResults, + hitCollector.topDocs()); + } +} + Property changes on: lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java (revision 0) +++ lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java (working copy) @@ -0,0 +1,153 @@ +package org.apache.lucene.facet.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.TextField; +import org.apache.lucene.facet.FacetTestCase; +import org.apache.lucene.facet.FacetTestUtils; +import org.apache.lucene.facet.index.FacetFields; +import org.apache.lucene.facet.search.DrillSideways.DrillSidewaysResult; +import org.apache.lucene.facet.search.params.CountFacetRequest; +import org.apache.lucene.facet.search.params.FacetSearchParams; +import org.apache.lucene.facet.search.results.FacetResult; +import org.apache.lucene.facet.search.results.FacetResultNode; +import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; +import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; +import org.apache.lucene.facet.util.PrintTaxonomyStats; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.store.Directory; + +public class TestDrillSideways extends FacetTestCase { + + private DirectoryTaxonomyWriter taxoWriter; + private RandomIndexWriter writer; + private FacetFields facetFields; + + private void add(String ... categoryPaths) throws IOException { + Document doc = new Document(); + List paths = new ArrayList(); + for(String categoryPath : categoryPaths) { + paths.add(new CategoryPath(categoryPath, '/')); + } + facetFields.addFields(doc, paths); + writer.addDocument(doc); + } + + public void test() throws Exception { + Directory dir = newDirectory(); + Directory taxoDir = newDirectory(); + writer = new RandomIndexWriter(random(), dir); + + // Writes facet ords to a separate directory from the + // main index: + taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE); + + // Reused across documents, to add the necessary facet + // fields: + facetFields = new FacetFields(taxoWriter); + + add("Author/Bob", "Publish Date/2010/10/15"); + add("Author/Lisa", "Publish Date/2010/10/20"); + add("Author/Lisa", "Publish Date/2012/1/1"); + add("Author/Susan", "Publish Date/2012/1/7"); + add("Author/Frank", "Publish Date/1999/5/5"); + + // NRT open + IndexSearcher searcher = newSearcher(writer.getReader()); + writer.close(); + + //System.out.println("searcher=" + searcher); + + // NRT open + TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); + taxoWriter.close(); + + // Count both "Publish Date" and "Author" dimensions, in + // drill down: + FacetSearchParams fsp = new FacetSearchParams( + new CountFacetRequest(new CategoryPath("Publish Date"), 10), + new CountFacetRequest(new CategoryPath("Author"), 10)); + + // Simple case: drill down on a single field; in this + // case the drill sideways + drill down counts == drill + // down of just the query: + DrillSidewaysResult r = DrillSideways.search(taxoReader, searcher, 10, new MatchAllDocsQuery(), fsp, + new CategoryPath("Author", "Lisa")); + assertEquals(2, r.hits.totalHits); + //System.out.println(r.drillDownResults); + //System.out.println(r.drillSidewaysResults); + assertEquals(2, r.drillDownResults.size()); + // nocommit why sorting by label descending...? + assertEquals("Publish Date: 2012=1 2010=1", toString(r.drillDownResults.get(0))); + assertEquals("Author: Lisa=2", toString(r.drillDownResults.get(1))); + + assertEquals(1, r.drillSidewaysResults.size()); + // nocommit how are these sorted...? + assertEquals("Author: Frank=1 Susan=1 Bob=1", toString(r.drillSidewaysResults.get(0))); + + + // More interesting case: drill down on two fields + r = DrillSideways.search(taxoReader, searcher, 10, new MatchAllDocsQuery(), fsp, + new CategoryPath("Author", "Lisa"), + new CategoryPath("Publish Date", "2010")); + assertEquals(1, r.hits.totalHits); + assertEquals(2, r.drillDownResults.size()); + assertEquals("Publish Date: 2010=1", toString(r.drillDownResults.get(0))); + assertEquals("Author: Lisa=1", toString(r.drillDownResults.get(1))); + assertEquals(2, r.drillSidewaysResults.size()); + // Only Bob also had Publish Date=2010 + assertEquals("Author: Bob=1", toString(r.drillSidewaysResults.get(0))); + // Lisa only had 2012 as the other Publish Date: + assertEquals("Publish Date: 2012=1", toString(r.drillSidewaysResults.get(1))); + + searcher.getIndexReader().close(); + taxoReader.close(); + dir.close(); + taxoDir.close(); + } + + /** Just gathers counts of values under the dim. */ + private String toString(FacetResult fr) { + StringBuilder b = new StringBuilder(); + FacetResultNode node = fr.getFacetResultNode(); + b.append(node.label); + b.append(":"); + for(FacetResultNode childNode : node.subResults) { + b.append(' '); + b.append(childNode.label.components[1]); + b.append('='); + b.append((int) childNode.value); + } + return b.toString(); + } +} + Property changes on: lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property