Index: lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java (revision 0) +++ lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java (working copy) @@ -0,0 +1,259 @@ +package org.apache.lucene.facet.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.TextField; +import org.apache.lucene.facet.FacetTestCase; +import org.apache.lucene.facet.FacetTestUtils; +import org.apache.lucene.facet.index.FacetFields; +import org.apache.lucene.facet.params.FacetSearchParams; +import org.apache.lucene.facet.search.CountFacetRequest; +import org.apache.lucene.facet.search.DrillSideways.DrillSidewaysResult; +import org.apache.lucene.facet.search.FacetResult; +import org.apache.lucene.facet.search.FacetResultNode; +import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; +import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; +import org.apache.lucene.facet.util.PrintTaxonomyStats; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.Directory; + +public class TestDrillSideways extends FacetTestCase { + + private DirectoryTaxonomyWriter taxoWriter; + private RandomIndexWriter writer; + private FacetFields facetFields; + + private void add(String ... categoryPaths) throws IOException { + Document doc = new Document(); + List paths = new ArrayList(); + for(String categoryPath : categoryPaths) { + paths.add(new CategoryPath(categoryPath, '/')); + } + facetFields.addFields(doc, paths); + writer.addDocument(doc); + } + + public void testBasic() throws Exception { + Directory dir = newDirectory(); + Directory taxoDir = newDirectory(); + writer = new RandomIndexWriter(random(), dir); + + // Writes facet ords to a separate directory from the + // main index: + taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE); + + // Reused across documents, to add the necessary facet + // fields: + facetFields = new FacetFields(taxoWriter); + + add("Author/Bob", "Publish Date/2010/10/15"); + add("Author/Lisa", "Publish Date/2010/10/20"); + add("Author/Lisa", "Publish Date/2012/1/1"); + add("Author/Susan", "Publish Date/2012/1/7"); + add("Author/Frank", "Publish Date/1999/5/5"); + + // NRT open + IndexSearcher searcher = newSearcher(writer.getReader()); + writer.close(); + + //System.out.println("searcher=" + searcher); + + // NRT open + TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); + taxoWriter.close(); + + // Count both "Publish Date" and "Author" dimensions, in + // drill-down: + FacetSearchParams fsp = new FacetSearchParams( + new CountFacetRequest(new CategoryPath("Publish Date"), 10), + new CountFacetRequest(new CategoryPath("Author"), 10)); + + // Simple case: drill-down on a single field; in this + // case the drill-sideways + drill-down counts == + // drill-down of just the query: + DrillDownQuery ddq = new DrillDownQuery(fsp.indexingParams, new MatchAllDocsQuery()); + ddq.add(new CategoryPath("Author", "Lisa")); + DrillSidewaysResult r = DrillSideways.search(searcher, taxoReader, ddq, 10, fsp); + + assertEquals(2, r.hits.totalHits); + assertEquals(2, r.facetResults.size()); + // Publish Date is only drill-down, and Lisa published + // one in 2012 and one in 2010: + assertEquals("Publish Date: 2012=1 2010=1", toString(r.facetResults.get(0))); + // Author is drill-sideways + drill-down: Lisa + // (drill-down) published twice, and Frank/Susan/Bob + // published once: + assertEquals("Author: Lisa=2 Frank=1 Susan=1 Bob=1", toString(r.facetResults.get(1))); + + // More interesting case: drill-down on two fields + ddq = new DrillDownQuery(fsp.indexingParams, new MatchAllDocsQuery()); + ddq.add(new CategoryPath("Author", "Lisa")); + ddq.add(new CategoryPath("Publish Date", "2010")); + r = DrillSideways.search(searcher, taxoReader, ddq, 10, fsp); + assertEquals(1, r.hits.totalHits); + assertEquals(2, r.facetResults.size()); + // Publish Date is drill-sideways + drill-down: Lisa + // (drill-down) published once in 2010 and once in 2012: + assertEquals("Publish Date: 2012=1 2010=1", toString(r.facetResults.get(0))); + // Author is drill-sideways + drill-down: + // only Lisa & Bob published (once each) in 2010: + assertEquals("Author: Lisa=1 Bob=1", toString(r.facetResults.get(1))); + + // Even more interesting case: drill down on two fields, + // but one of them is OR + ddq = new DrillDownQuery(fsp.indexingParams, new MatchAllDocsQuery()); + + // Drill down on Lisa or Bob: + ddq.add(new CategoryPath("Author", "Lisa"), + new CategoryPath("Author", "Bob")); + ddq.add(new CategoryPath("Publish Date", "2010")); + r = DrillSideways.search(searcher, taxoReader, ddq, 10, fsp); + assertEquals(2, r.hits.totalHits); + assertEquals(2, r.facetResults.size()); + // Publish Date is both drill-sideways + drill-down: + // Lisa or Bob published twice in 2010 and once in 2012: + assertEquals("Publish Date: 2010=2 2012=1", toString(r.facetResults.get(0))); + // Author is drill-sideways + drill-down: + // only Lisa & Bob published (once each) in 2010: + assertEquals("Author: Lisa=1 Bob=1", toString(r.facetResults.get(1))); + + // Test drilling down on invalid field: + ddq = new DrillDownQuery(fsp.indexingParams, new MatchAllDocsQuery()); + ddq.add(new CategoryPath("Foobar", "Baz")); + fsp = new FacetSearchParams( + new CountFacetRequest(new CategoryPath("Publish Date"), 10), + new CountFacetRequest(new CategoryPath("Foobar"), 10)); + r = DrillSideways.search(searcher, taxoReader, ddq, 10, fsp); + assertEquals(0, r.hits.totalHits); + assertEquals(2, r.facetResults.size()); + // nocommit why isn't it null? should the ds dim be + // "empty FacetResult" too? + // assertNull(r.facetResults.get(0)); + assertEquals("Publish Date:", toString(r.facetResults.get(0))); + assertNull(r.facetResults.get(1)); + + // Test main query gets null scorer: + fsp = new FacetSearchParams( + new CountFacetRequest(new CategoryPath("Publish Date"), 10), + new CountFacetRequest(new CategoryPath("Author"), 10)); + ddq = new DrillDownQuery(fsp.indexingParams, new TermQuery(new Term("foobar", "baz"))); + ddq.add(new CategoryPath("Author", "Lisa")); + r = DrillSideways.search(searcher, taxoReader, ddq, 10, fsp); + + assertEquals(0, r.hits.totalHits); + assertEquals(2, r.facetResults.size()); + // nocommit when null vs empty? + assertEquals("Publish Date:", toString(r.facetResults.get(0))); + // nocommit when null vs empty? + assertEquals("Author:", toString(r.facetResults.get(1))); + + searcher.getIndexReader().close(); + taxoReader.close(); + dir.close(); + taxoDir.close(); + } + + public void testSometimesInvalidDrillDown() throws Exception { + Directory dir = newDirectory(); + Directory taxoDir = newDirectory(); + writer = new RandomIndexWriter(random(), dir); + + // Writes facet ords to a separate directory from the + // main index: + taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE); + + // Reused across documents, to add the necessary facet + // fields: + facetFields = new FacetFields(taxoWriter); + + add("Author/Bob", "Publish Date/2010/10/15"); + add("Author/Lisa", "Publish Date/2010/10/20"); + writer.commit(); + // 2nd segment has no Author: + add("Foobar/Lisa", "Publish Date/2012/1/1"); + + // NRT open + IndexSearcher searcher = newSearcher(writer.getReader()); + writer.close(); + + //System.out.println("searcher=" + searcher); + + // NRT open + TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); + taxoWriter.close(); + + // Count both "Publish Date" and "Author" dimensions, in + // drill-down: + FacetSearchParams fsp = new FacetSearchParams( + new CountFacetRequest(new CategoryPath("Publish Date"), 10), + new CountFacetRequest(new CategoryPath("Author"), 10)); + + DrillDownQuery ddq = new DrillDownQuery(fsp.indexingParams, new MatchAllDocsQuery()); + ddq.add(new CategoryPath("Author", "Lisa")); + DrillSidewaysResult r = DrillSideways.search(searcher, taxoReader, ddq, 10, fsp); + + assertEquals(1, r.hits.totalHits); + assertEquals(2, r.facetResults.size()); + // Publish Date is only drill-down, and Lisa published + // one in 2012 and one in 2010: + assertEquals("Publish Date: 2010=1", toString(r.facetResults.get(0))); + // Author is drill-sideways + drill-down: Lisa + // (drill-down) published once, and Bob + // published once: + assertEquals("Author: Lisa=1 Bob=1", toString(r.facetResults.get(1))); + + searcher.getIndexReader().close(); + taxoReader.close(); + dir.close(); + taxoDir.close(); + } + + // nocommit make test where main query rewrites to null scorer! + + /** Just gathers counts of values under the dim. */ + private String toString(FacetResult fr) { + StringBuilder b = new StringBuilder(); + FacetResultNode node = fr.getFacetResultNode(); + b.append(node.label); + b.append(":"); + for(FacetResultNode childNode : node.subResults) { + b.append(' '); + b.append(childNode.label.components[1]); + b.append('='); + b.append((int) childNode.value); + } + return b.toString(); + } +} + Property changes on: lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java (working copy) @@ -0,0 +1,508 @@ +package org.apache.lucene.facet.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.IdentityHashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.lucene.facet.params.FacetIndexingParams; +import org.apache.lucene.facet.params.FacetSearchParams; +import org.apache.lucene.facet.search.CountFacetRequest; +import org.apache.lucene.facet.search.FacetRequest; +import org.apache.lucene.facet.search.FacetResult; +import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.Filter; +import org.apache.lucene.search.FilteredQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorer.ChildScorer; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.TopFieldCollector; +import org.apache.lucene.search.TopScoreDocCollector; +import org.apache.lucene.search.Weight; + +// nocommit explain javadocs that this allocates N facet +// arrays under the hood so if you have many ords... + +// TODO: maybe open up a way to pass "prior drill down +// results" for the newly drill down dim? Add serious hair +// (plus app must round-trip this in a stateles server) +// but should be good perf win ... + +/** + @lucene.experimental + */ +public final class DrillSideways { + + private DrillSideways() { + // Don't init + } + + /** Adds one dimension of drill downs; if you pass + * multiple values they are OR'd. */ + /* + public void addDrillDown(CategoryPath... paths) { + Query q; + if (paths[0].length == 0) { + throw new IllegalArgumentException("all CategoryPaths must have length > 0"); + } + String dim = paths[0].components[0]; + if (drillDownDims.containsKey(dim)) { + throw new IllegalArgumentException("dimension '" + dim + "' was already added"); + } + if (paths.length == 1) { + q = new TermQuery(DrillDown.term(fip, paths[0])); + } else { + BooleanQuery bq = new BooleanQuery(true); // disable coord + for (CategoryPath cp : paths) { + if (cp.length == 0) { + throw new IllegalArgumentException("all CategoryPaths must have length > 0"); + } + if (!cp.components[0].equals(dim)) { + throw new IllegalArgumentException("multiple (OR'd) drill-down paths must be under same dimension; got '" + dim + "' and '" + cp.components[0] + "'"); + } + bq.add(new TermQuery(DrillDown.term(fip, cp)), Occur.SHOULD); + } + q = bq; + } + drillDownDims.put(dim, drillDownDims.size()); + drillDownQueries.add(q); + } + */ + + // nocommit also take "after"? + public static DrillSidewaysResult search(IndexSearcher searcher, TaxonomyReader taxoReader, DrillDownQuery query, + Collector hitCollector, FacetSearchParams fsp) throws IOException { + + Map drillDownDims = query.getDims(); + + // nocommit remove this limitation: it's silly? just do + // pure drill down in this case? + if (drillDownDims.isEmpty()) { + throw new IllegalArgumentException("there must be at least one drill-down"); + } + + BooleanQuery ddq = query.getBooleanQuery(); + BooleanClause[] clauses = ddq.getClauses(); + + // nocommit remove this limitation (allow pure browse case): + if (clauses.length == drillDownDims.size()) { + throw new IllegalArgumentException("baseQuery must not be null"); + } + + assert clauses.length == 1+drillDownDims.size(); + + // TODO: maybe make our own minShouldMatch specialized + // to the N-1 case? just uses next() on rarest term and + // advance on the rest? + + for(int i=0;i 0"); + } + } + + final DrillSidewaysCollector collector = new DrillSidewaysCollector(hitCollector, drillDownDims, fsp, searcher, taxoReader); + + // Disable coord (may improve perf): + BooleanQuery topQuery = new BooleanQuery(true); + + // TODO: if query is already a BQ we could copy that and + // add clauses to it, instead of doing BQ inside BQ + // (should be more efficient)? Problem is this can + // affect scoring (coord) ... too bad we can't disable + // coord on a clause by clause basis: + topQuery.add(clauses[0].getQuery(), BooleanClause.Occur.MUST); + + // NOTE: in theory we could just make a single BQ, with + // +query a b c minShouldMatch=2, but in this case BS2 + // wraps a sub-scorer that always returns 2 as the + // .freq(), not how many of the SHOULD clauses matched + + // Disable coord (may improve perf): + BooleanQuery subQuery = new BooleanQuery(true); + + Query wrappedSubQuery = new QueryWrapper(subQuery, + new SetWeight() { + @Override + public void set(Weight w) { + collector.setWeight(w, -1); + } + }); + Query constantScoreSubQuery = new ConstantScoreQuery(wrappedSubQuery); + + // Don't impact score of original query: + constantScoreSubQuery.setBoost(0.0f); + + topQuery.add(constantScoreSubQuery, BooleanClause.Occur.MUST); + + // Unfortunately I think this sub-BooleanQuery + // will never get BS1 because today BS1 only works + // if topScorer=true? Actually we cannot use BS1 + // anyways because we need each sub to be "on" the + // matched document in collect(): + int dimIndex = 0; + for(int i=1;i doc: "subDoc=" + subDoc + " doc=" + doc; + drillSidewaysCollectors[i].collect(doc); + assert allMatchesFrom(i+1, doc); + break; + } + } + } + } + + // Only used by assert: + private boolean allMatchesFrom(int startFrom, int doc) { + for(int i=startFrom;i facetResults; + public TopDocs hits; + + DrillSidewaysResult(List facetResults, TopDocs hits) { + this.facetResults = facetResults; + this.hits = hits; + } + } + + /** Just records which Weight was given out for the + * (possibly rewritten) Query. */ + private static class QueryWrapper extends Query { + private final Query originalQuery; + private final SetWeight setter; + + public QueryWrapper(Query originalQuery, SetWeight setter) { + this.originalQuery = originalQuery; + this.setter = setter; + } + + @Override + public Weight createWeight(final IndexSearcher searcher) throws IOException { + Weight w = originalQuery.createWeight(searcher); + setter.set(w); + return w; + } + + @Override + public Query rewrite(IndexReader reader) throws IOException { + Query rewritten = originalQuery.rewrite(reader); + if (rewritten != originalQuery) { + return new QueryWrapper(rewritten, setter); + } else { + return this; + } + } + + @Override + public String toString(String s) { + return originalQuery.toString(s); + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof QueryWrapper)) return false; + final QueryWrapper other = (QueryWrapper) o; + return super.equals(o) && originalQuery.equals(other.originalQuery); + } + + @Override + public int hashCode() { + return super.hashCode() * 31 + originalQuery.hashCode(); + } + } + + private interface SetWeight { + public void set(Weight w); + } +} + Property changes on: lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/search/DrillDownQuery.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/DrillDownQuery.java (revision 1444621) +++ lucene/facet/src/java/org/apache/lucene/facet/search/DrillDownQuery.java (working copy) @@ -18,8 +18,8 @@ */ import java.io.IOException; -import java.util.HashSet; -import java.util.Set; +import java.util.LinkedHashMap; +import java.util.Map; import org.apache.lucene.facet.params.CategoryListParams; import org.apache.lucene.facet.params.FacetIndexingParams; @@ -57,21 +57,20 @@ } private final BooleanQuery query; - private final Set drillDownDims = new HashSet(); - + private final Map drillDownDims = new LinkedHashMap(); private final FacetIndexingParams fip; /* Used by clone() */ - private DrillDownQuery(FacetIndexingParams fip, BooleanQuery query, Set drillDownDims) { + private DrillDownQuery(FacetIndexingParams fip, BooleanQuery query, Map drillDownDims) { this.fip = fip; this.query = query.clone(); - this.drillDownDims.addAll(drillDownDims); + this.drillDownDims.putAll(drillDownDims); } /** - * Creates a new {@link DrillDownQuery} without a base query, which means that - * you intend to perfor a pure browsing query (equivalent to using - * {@link MatchAllDocsQuery} as base. + * Creates a new {@link DrillDownQuery} without a base query, + * to perform a pure browsing query (equivalent to using + * {@link MatchAllDocsQuery} as base). */ public DrillDownQuery(FacetIndexingParams fip) { this(fip, null); @@ -97,14 +96,14 @@ */ public void add(CategoryPath... paths) { Query q; + if (paths[0].length == 0) { + throw new IllegalArgumentException("all CategoryPaths must have length > 0"); + } String dim = paths[0].components[0]; - if (drillDownDims.contains(dim)) { + if (drillDownDims.containsKey(dim)) { throw new IllegalArgumentException("dimension '" + dim + "' was already added"); } if (paths.length == 1) { - if (paths[0].length == 0) { - throw new IllegalArgumentException("all CategoryPaths must have length > 0"); - } q = new TermQuery(term(fip, paths[0])); } else { BooleanQuery bq = new BooleanQuery(true); // disable coord @@ -120,7 +119,7 @@ } q = bq; } - drillDownDims.add(dim); + drillDownDims.put(dim, drillDownDims.size()); final ConstantScoreQuery drillDownQuery = new ConstantScoreQuery(q); drillDownQuery.setBoost(0.0f); @@ -162,5 +161,12 @@ public String toString(String field) { return query.toString(field); } - + + BooleanQuery getBooleanQuery() { + return query; + } + + Map getDims() { + return drillDownDims; + } }