Index: lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java (revision 0) +++ lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java (working copy) @@ -0,0 +1,151 @@ +package org.apache.lucene.facet.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.TextField; +import org.apache.lucene.facet.FacetTestCase; +import org.apache.lucene.facet.FacetTestUtils; +import org.apache.lucene.facet.index.FacetFields; +import org.apache.lucene.facet.search.DrillSideways.DrillSidewaysResult; +import org.apache.lucene.facet.search.params.CountFacetRequest; +import org.apache.lucene.facet.search.params.FacetSearchParams; +import org.apache.lucene.facet.search.results.FacetResult; +import org.apache.lucene.facet.search.results.FacetResultNode; +import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; +import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; +import org.apache.lucene.facet.util.PrintTaxonomyStats; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.store.Directory; + +public class TestDrillSideways extends FacetTestCase { + + private DirectoryTaxonomyWriter taxoWriter; + private RandomIndexWriter writer; + private FacetFields facetFields; + + private void add(String ... categoryPaths) throws IOException { + Document doc = new Document(); + List paths = new ArrayList(); + for(String categoryPath : categoryPaths) { + paths.add(new CategoryPath(categoryPath, '/')); + } + facetFields.addFields(doc, paths); + writer.addDocument(doc); + } + + public void test() throws Exception { + Directory dir = newDirectory(); + Directory taxoDir = newDirectory(); + writer = new RandomIndexWriter(random(), dir); + + // Writes facet ords to a separate directory from the + // main index: + taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE); + + // Reused across documents, to add the necessary facet + // fields: + facetFields = new FacetFields(taxoWriter); + + add("Author/Bob", "Publish Date/2010/10/15"); + add("Author/Lisa", "Publish Date/2010/10/20"); + add("Author/Lisa", "Publish Date/2012/1/1"); + add("Author/Susan", "Publish Date/2012/1/7"); + add("Author/Frank", "Publish Date/1999/5/5"); + + // NRT open + IndexSearcher searcher = newSearcher(writer.getReader()); + writer.close(); + + //System.out.println("searcher=" + searcher); + + // NRT open + TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); + taxoWriter.close(); + + // Count both "Publish Date" and "Author" dimensions, in + // drill down: + FacetSearchParams fsp = new FacetSearchParams( + new CountFacetRequest(new CategoryPath("Publish Date"), 10), + new CountFacetRequest(new CategoryPath("Author"), 10)); + + // Simple case: drill down on a single field; in this + // case the drill sideways + drill down counts == drill + // down of just the query: + DrillSidewaysResult r = DrillSideways.search(taxoReader, searcher, 10, new MatchAllDocsQuery(), fsp, + new CategoryPath("Author", "Lisa")); + //System.out.println(r.drillDownResults); + //System.out.println(r.drillSidewaysResults); + assertEquals(2, r.drillDownResults.size()); + // nocommit why sorting by label descending...? + assertEquals("Publish Date: 2012=1 2010=1", toString(r.drillDownResults.get(0))); + assertEquals("Author: Lisa=2", toString(r.drillDownResults.get(1))); + + assertEquals(1, r.drillSidewaysResults.size()); + // nocommit how are these sorted...? + assertEquals("Author: Frank=1 Susan=1 Bob=1", toString(r.drillSidewaysResults.get(0))); + + + // More interesting case: drill down on two fields + r = DrillSideways.search(taxoReader, searcher, 10, new MatchAllDocsQuery(), fsp, + new CategoryPath("Author", "Lisa"), + new CategoryPath("Publish Date", "2010")); + assertEquals(2, r.drillDownResults.size()); + assertEquals("Publish Date: 2010=1", toString(r.drillDownResults.get(0))); + assertEquals("Author: Lisa=1", toString(r.drillDownResults.get(1))); + assertEquals(2, r.drillSidewaysResults.size()); + // Only Bob also had Publish Date=2010 + assertEquals("Author: Bob=1", toString(r.drillSidewaysResults.get(0))); + // Lisa only had 2012 as the other Publish Date: + assertEquals("Publish Date: 2012=1", toString(r.drillSidewaysResults.get(1))); + + searcher.getIndexReader().close(); + taxoReader.close(); + dir.close(); + taxoDir.close(); + } + + /** Just gathers counts of values under the dim. */ + private String toString(FacetResult fr) { + StringBuilder b = new StringBuilder(); + FacetResultNode node = fr.getFacetResultNode(); + b.append(node.label); + b.append(":"); + for(FacetResultNode childNode : node.subResults) { + b.append(' '); + b.append(childNode.label.components[1]); + b.append('='); + b.append((int) childNode.value); + } + return b.toString(); + } +} + Property changes on: lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java (working copy) @@ -0,0 +1,226 @@ +package org.apache.lucene.facet.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.apache.lucene.facet.search.params.CountFacetRequest; +import org.apache.lucene.facet.search.params.FacetRequest; +import org.apache.lucene.facet.search.params.FacetSearchParams; +import org.apache.lucene.facet.search.results.FacetResult; +import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorer.ChildScorer; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopScoreDocCollector; +import org.apache.lucene.search.Weight; + +// nocommit we need per-dim control of AND vs OR, eg maybe i +// drill down on Price < 100 AND (Brand = 'Sony' OR Brand = +// 'Nikon'); really each dim should be list of CPs...? + +public final class DrillSideways { + + private static class DrillSidewaysCollector extends Collector { + + private final Collector hitCollector; + private final CategoryPath[] paths; + private final FacetsCollector drillDownCollector; + private final FacetsCollector[] drillSidewaysCollectors; + + private Scorer scorer; + private Scorer mainScorer; + private Scorer[] subScorers; + + public DrillSidewaysCollector(Collector hitCollector, CategoryPath[] paths, FacetSearchParams fsp, IndexSearcher searcher, TaxonomyReader taxoReader) { + this.hitCollector = hitCollector; + this.paths = paths; + drillDownCollector = FacetsCollector.create(fsp, searcher.getIndexReader(), taxoReader); + + // nocommit: we should be able to share a single + // FacetArrays across all of these + // collectors because the ords will be orthogonal + // ... problem is we'd have to determine at decode + // time whether the ord falls under the dim we must + // count for this one hit ... tricky + drillSidewaysCollectors = new FacetsCollector[paths.length]; + for(int i=0;i doc; + drillSidewaysCollectors[i].collect(doc); + break; + } + } + } + } + + @Override + public boolean acceptsDocsOutOfOrder() { + // We actually could accept docs out of order, but, we + // need to force BooleanScorer2 so that the + // sub-scorers are "on" each docID we are collecting: + return false; + } + + @Override + public void setNextReader(AtomicReaderContext leaf) throws IOException { + hitCollector.setNextReader(leaf); + drillDownCollector.setNextReader(leaf); + for(Collector dsc : drillSidewaysCollectors) { + dsc.setNextReader(leaf); + } + } + + @Override + public void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + hitCollector.setScorer(scorer); + drillDownCollector.setScorer(scorer); + for(Collector dsc : drillSidewaysCollectors) { + dsc.setScorer(scorer); + } + } + + public void setScorers(Scorer mainScorer, List subScorersIn) { + this.mainScorer = mainScorer; + //System.out.println("mainScorer=" + mainScorer); + subScorers = new Scorer[subScorersIn.size()]; + for(int i=0;i