Index: lucene/facet/src/java/org/apache/lucene/facet/index/categorypolicy/OrdinalPolicy.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/index/categorypolicy/OrdinalPolicy.java (revision 1420558) +++ lucene/facet/src/java/org/apache/lucene/facet/index/categorypolicy/OrdinalPolicy.java (working copy) @@ -48,6 +48,9 @@ @Override public void init(TaxonomyWriter taxonomyWriter) {} + + @Override + public String toString() { return "NO_PARENTS"; } }; /** @@ -61,14 +64,17 @@ @Override public void init(TaxonomyWriter taxonomyWriter) {} + + @Override + public String toString() { return "ALL_PARENTS"; } }; /** * Check whether a given category ordinal should be added to the stream. * * @param ordinal - * A given category ordinal which is to be tested for stream - * addition. + * A given category ordinal which is to be tested for stream + * addition. * @return true if the category should be added. * false otherwise. */ Index: lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/NoParentsCountingAggregator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/NoParentsCountingAggregator.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/NoParentsCountingAggregator.java (working copy) @@ -0,0 +1,87 @@ +package org.apache.lucene.facet.search.aggregator; + +import java.io.IOException; + +import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy; +import org.apache.lucene.facet.index.params.FacetIndexingParams; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * An {@link Aggregator} which counts the number of ocurrences of each category + * in a given set of documents. Similar to {@link CountingAggregator}, only + * counts for parents are computed on the fly (assumes + * {@link OrdinalPolicy#NO_PARENTS} was used during indexing). + * + *

+ * NOTE: this aggregator does not work with partitions, i.e. if you + * overrode {@link FacetIndexingParams#getPartitionSize()}, you should not work + * with this aggregator, because e.g. the parents of a counted category may not + * reside in the same partition. + * + * @lucene.experimental + */ +public class NoParentsCountingAggregator implements Aggregator { + + // nocommit this class does not yet check that the same parent isn't counted twice + // one way to do that is to allocate a FixedBitSet the size of counterArray and mark + // every counted ordinal, and in setNextDoc to reset the array + // another option is to look for the ordinal in the list of counted ordinals .. but + // how expensive would that be? + + protected int[] counterArray; + protected final int[] parents; + + public NoParentsCountingAggregator(TaxonomyReader taxoReader, int[] counterArray) throws IOException { + this.counterArray = counterArray; + this.parents = taxoReader.getParallelTaxonomyArrays().parents(); + } + + @Override + public void aggregate(int ordinal) { + ++counterArray[ordinal]; + int parent = parents[ordinal]; + while (parent != 0) { + ++counterArray[parent]; + parent = parents[parent]; + } + } + + @Override + public boolean equals(Object obj) { + if (obj == null || obj.getClass() != this.getClass()) { + return false; + } + NoParentsCountingAggregator that = (NoParentsCountingAggregator) obj; + return that.counterArray == this.counterArray; + } + + @Override + public int hashCode() { + return counterArray == null ? 0 : counterArray.hashCode(); + } + + @Override + public void setNextDoc(int docid, float score) { + // There's nothing for us to do here since we only increment the count by 1 + // in this aggregator. + // nocommit should we do something here? e.g. see nocommit above + } + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/NoParentsCountingAggregator.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/search/params/CountFacetRequest.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/params/CountFacetRequest.java (revision 1420558) +++ lucene/facet/src/java/org/apache/lucene/facet/search/params/CountFacetRequest.java (working copy) @@ -1,6 +1,6 @@ package org.apache.lucene.facet.search.params; -import org.apache.lucene.index.IndexReader; +import java.io.IOException; import org.apache.lucene.facet.search.FacetArrays; import org.apache.lucene.facet.search.aggregator.Aggregator; @@ -8,6 +8,7 @@ import org.apache.lucene.facet.search.aggregator.CountingAggregator; import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.index.IndexReader; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -48,8 +49,8 @@ @Override public Aggregator createAggregator(boolean useComplements, - FacetArrays arrays, IndexReader reader, - TaxonomyReader taxonomy) { + FacetArrays arrays, IndexReader reader, TaxonomyReader taxonomy) + throws IOException { // we rely on that, if needed, result is cleared by arrays! int[] a = arrays.getIntArray(); if (useComplements) { Index: lucene/facet/src/test/org/apache/lucene/facet/index/categorypolicy/OrdinalPolicyTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/index/categorypolicy/OrdinalPolicyTest.java (revision 1420558) +++ lucene/facet/src/test/org/apache/lucene/facet/index/categorypolicy/OrdinalPolicyTest.java (working copy) @@ -1,11 +1,41 @@ package org.apache.lucene.facet.index.categorypolicy; +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; + +import org.apache.lucene.document.Document; +import org.apache.lucene.facet.index.CategoryDocumentBuilder; +import org.apache.lucene.facet.index.params.CategoryListParams; +import org.apache.lucene.facet.index.params.DefaultFacetIndexingParams; +import org.apache.lucene.facet.index.params.FacetIndexingParams; +import org.apache.lucene.facet.search.CategoryListIterator; +import org.apache.lucene.facet.search.FacetArrays; +import org.apache.lucene.facet.search.FacetsAccumulator; +import org.apache.lucene.facet.search.FacetsCollector; +import org.apache.lucene.facet.search.PayloadIntDecodingIterator; +import org.apache.lucene.facet.search.StandardFacetsAccumulator; +import org.apache.lucene.facet.search.aggregator.Aggregator; +import org.apache.lucene.facet.search.aggregator.NoParentsCountingAggregator; +import org.apache.lucene.facet.search.params.CountFacetRequest; +import org.apache.lucene.facet.search.params.FacetSearchParams; +import org.apache.lucene.facet.search.params.FacetRequest.ResultMode; +import org.apache.lucene.facet.search.results.FacetResult; +import org.apache.lucene.facet.search.results.FacetResultNode; import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.facet.taxonomy.TaxonomyWriter; +import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.encoding.IntDecoder; import org.junit.Test; /* @@ -85,5 +115,100 @@ taxonomy.close(); dir.close(); } + + @Test + public void testNoParents() throws Exception { + final Directory indexDir = newDirectory(); + final Directory taxoDir = newDirectory(); + final IndexWriter iw = new IndexWriter(indexDir, newIndexWriterConfig(TEST_VERSION_CURRENT, null)); + final TaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir); + final FacetIndexingParams fip = new DefaultFacetIndexingParams() { + @Override + protected OrdinalPolicy fixedOrdinalPolicy() { + return OrdinalPolicy.NO_PARENTS; + } + }; + final CategoryDocumentBuilder cdb = new CategoryDocumentBuilder(tw, fip); + + CategoryPath[] categories = new CategoryPath[] { + new CategoryPath( "a", "b"), + new CategoryPath( "a", "b", "1"), + new CategoryPath( "a", "b", "1"), + new CategoryPath( "a", "b", "2"), + new CategoryPath( "a", "b", "2"), + new CategoryPath( "a", "b", "3"), + new CategoryPath( "a", "b", "4"), + new CategoryPath( "a", "c"), + new CategoryPath( "a", "c"), + new CategoryPath( "a", "c"), + new CategoryPath( "a", "c"), + new CategoryPath( "a", "c"), + new CategoryPath( "a", "c", "1"), + }; + + HashMap expectedCounts = new HashMap(); + for (CategoryPath cp : categories) { + iw.addDocument(cdb.setCategoryPaths(Collections.singletonList(cp)).build(new Document())); + int numComponents = cp.length(); + for (int i = 1; i <= numComponents; i++) { + String cat = cp.toString('/', i); + Integer count = expectedCounts.get(cat); + expectedCounts.put(cat, count == null ? Integer.valueOf(1) : Integer.valueOf(1 + count.intValue())); + } + } + IOUtils.close(iw, tw); + + final DirectoryReader ir = DirectoryReader.open(indexDir); + final TaxonomyReader tr = new DirectoryTaxonomyReader(taxoDir); + + int maxDoc = ir.maxDoc(); + IntDecoder decoder = fip.getCategoryListParams(new CategoryPath()).createEncoder().createMatchingDecoder(); + CategoryListIterator cli = new PayloadIntDecodingIterator(ir, CategoryListParams.DEFAULT_TERM, decoder); + assertTrue("failed to init", cli.init()); + for (int i = 0; i < maxDoc; i++) { + assertTrue("failed to skip to doc " + i, cli.skipTo(i)); + long ord = cli.nextCategory(); + assertTrue("unexpected EOS", ord != IntDecoder.EOS); + assertTrue("unexpected ROOT ordinal", ord != 0); + assertEquals("expected EOS - only one ordinal should have been written", IntDecoder.EOS, cli.nextCategory()); + } + + IndexSearcher searcher = new IndexSearcher(ir); + FacetSearchParams fsp = new FacetSearchParams(fip); + CountFacetRequest cfr = new CountFacetRequest(new CategoryPath("a"), 100) { + @Override + public Aggregator createAggregator(boolean useComplements, + FacetArrays arrays, IndexReader reader, TaxonomyReader taxonomy) + throws IOException { + return new NoParentsCountingAggregator(taxonomy, arrays.getIntArray()); + } + }; + cfr.setDepth(3); + cfr.setResultMode(ResultMode.GLOBAL_FLAT); // easier to compare + fsp.addFacetRequest(cfr); + FacetsCollector fc = new FacetsCollector(fsp, ir, tr) { + @Override + protected FacetsAccumulator initFacetsAccumulator( + FacetSearchParams facetSearchParams, IndexReader indexReader, + TaxonomyReader taxonomyReader) { + FacetsAccumulator fa = new StandardFacetsAccumulator(facetSearchParams, indexReader, taxonomyReader); + fa.setComplementThreshold(FacetsAccumulator.DISABLE_COMPLEMENT); + return fa; + } + }; + searcher.search(new MatchAllDocsQuery(), fc); + FacetResult res = fc.getFacetResults().get(0); // only one request + FacetResultNode frn = res.getFacetResultNode(); + String label = frn.getLabel().toString('/'); + assertEquals("incorrect count for " + label, expectedCounts.get(label).intValue(), (int) frn.getValue()); + for (FacetResultNode node : frn.getSubResults()) { + label = node.getLabel().toString('/'); + assertEquals("incorrect count for " + label, expectedCounts.get(label).intValue(), (int) node.getValue()); + } + + IOUtils.close(ir, tr); + IOUtils.close(indexDir, taxoDir); + } + }