Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1445061) +++ lucene/CHANGES.txt (working copy) @@ -72,6 +72,10 @@ * LUCENE-4759: remove FacetRequest.SortBy; result categories are always sorted by value, while ties are broken by category ordinal. (Shai Erera) +* LUCENE-4772: Facet associations moved to new FacetsAggregator API. You + should override FacetsAccumualtor and return the relevant aggregator, + for aggregating the association values. (Shai Erera) + Optimizations * LUCENE-4687: BloomFilterPostingsFormat now lazily initializes delegate Index: lucene/demo/src/java/org/apache/lucene/demo/facet/AssociationsFacetsExample.java =================================================================== --- lucene/demo/src/java/org/apache/lucene/demo/facet/AssociationsFacetsExample.java (revision 1445061) +++ lucene/demo/src/java/org/apache/lucene/demo/facet/AssociationsFacetsExample.java (working copy) @@ -1,7 +1,9 @@ package org.apache.lucene.demo.facet; import java.io.IOException; +import java.util.HashMap; import java.util.List; +import java.util.Map; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.document.Document; @@ -12,12 +14,15 @@ import org.apache.lucene.facet.associations.CategoryAssociationsContainer; import org.apache.lucene.facet.associations.CategoryFloatAssociation; import org.apache.lucene.facet.associations.CategoryIntAssociation; +import org.apache.lucene.facet.associations.MultiAssociationsFacetsAggregator; +import org.apache.lucene.facet.associations.SumFloatAssociationFacetsAggregator; +import org.apache.lucene.facet.associations.SumIntAssociationFacetsAggregator; import org.apache.lucene.facet.index.FacetFields; import org.apache.lucene.facet.params.FacetSearchParams; import org.apache.lucene.facet.search.FacetResult; import org.apache.lucene.facet.search.FacetsAccumulator; +import org.apache.lucene.facet.search.FacetsAggregator; import org.apache.lucene.facet.search.FacetsCollector; -import org.apache.lucene.facet.search.StandardFacetsAccumulator; import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; @@ -122,43 +127,29 @@ taxoWriter.close(); } - /** User runs a query and aggregates facets by summing their int associations. */ - private List sumIntAssociations() throws IOException { + /** User runs a query and aggregates facets by summing their association values. */ + private List sumAssociations() throws IOException { DirectoryReader indexReader = DirectoryReader.open(indexDir); IndexSearcher searcher = new IndexSearcher(indexReader); TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); - - // sum the 'tags' dimension - FacetSearchParams fsp = new FacetSearchParams(new AssociationIntSumFacetRequest(new CategoryPath("tags"), 10)); - - FacetsAccumulator fa = new StandardFacetsAccumulator(fsp, indexReader, taxoReader); - FacetsCollector fc = FacetsCollector.create(fa); - - // MatchAllDocsQuery is for "browsing" (counts facets - // for all non-deleted docs in the index); normally - // you'd use a "normal" query, and use MultiCollector to - // wrap collecting the "normal" hits and also facets: - searcher.search(new MatchAllDocsQuery(), fc); - - // Retrieve results - List facetResults = fc.getFacetResults(); - indexReader.close(); - taxoReader.close(); - - return facetResults; - } + CategoryPath tags = new CategoryPath("tags"); + CategoryPath genre = new CategoryPath("genre"); + FacetSearchParams fsp = new FacetSearchParams( + new AssociationIntSumFacetRequest(tags, 10), + new AssociationFloatSumFacetRequest(genre, 10)); - /** User runs a query and aggregates facets by summing their float associations. */ - private List sumFloatAssociations() throws IOException { - DirectoryReader indexReader = DirectoryReader.open(indexDir); - IndexSearcher searcher = new IndexSearcher(indexReader); - TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); - - // sum the 'tags' dimension - FacetSearchParams fsp = new FacetSearchParams(new AssociationFloatSumFacetRequest(new CategoryPath("genre"), 10)); - - FacetsAccumulator fa = new StandardFacetsAccumulator(fsp, indexReader, taxoReader); + // every category has a different type of association, so use chain their + // respective aggregators. + final Map aggregators = new HashMap(); + aggregators.put(tags, new SumIntAssociationFacetsAggregator()); + aggregators.put(genre, new SumFloatAssociationFacetsAggregator()); + FacetsAccumulator fa = new FacetsAccumulator(fsp, indexReader, taxoReader) { + @Override + public FacetsAggregator getAggregator() { + return new MultiAssociationsFacetsAggregator(aggregators); + } + }; FacetsCollector fc = FacetsCollector.create(fa); // MatchAllDocsQuery is for "browsing" (counts facets @@ -176,34 +167,20 @@ return facetResults; } - /** Runs summing int association example. */ - public List runSumIntAssociations() throws IOException { + /** Runs summing association example. */ + public List runSumAssociations() throws IOException { index(); - return sumIntAssociations(); + return sumAssociations(); } - /** Runs summing float association example. */ - public List runSumFloatAssociations() throws IOException { - index(); - return sumFloatAssociations(); - } - /** Runs the sum int/float associations examples and prints the results. */ public static void main(String[] args) throws Exception { - System.out.println("Sum int-associations example:"); - System.out.println("-----------------------------"); - List results = new AssociationsFacetsExample().runSumIntAssociations(); + System.out.println("Sum associations example:"); + System.out.println("-------------------------"); + List results = new AssociationsFacetsExample().runSumAssociations(); for (FacetResult res : results) { System.out.println(res); } - - System.out.println("\n"); - System.out.println("Sum float-associations example:"); - System.out.println("-------------------------------"); - results = new AssociationsFacetsExample().runSumFloatAssociations(); - for (FacetResult res : results) { - System.out.println(res); - } } } Index: lucene/demo/src/test/org/apache/lucene/demo/facet/TestAssociationsFacetsExample.java =================================================================== --- lucene/demo/src/test/org/apache/lucene/demo/facet/TestAssociationsFacetsExample.java (revision 1445061) +++ lucene/demo/src/test/org/apache/lucene/demo/facet/TestAssociationsFacetsExample.java (working copy) @@ -31,20 +31,25 @@ @Test public void testExamples() throws Exception { - assertExampleResult(new AssociationsFacetsExample().runSumIntAssociations(), EXPECTED_INT_SUM_RESULTS); - assertExampleResult(new AssociationsFacetsExample().runSumFloatAssociations(), EXPECTED_FLOAT_SUM_RESULTS); - } - - private void assertExampleResult(List res, double[] expectedResults) { - assertNotNull("Null result!", res); - assertEquals("Wrong number of results!", 1, res.size()); - assertEquals("Wrong number of facets!", 2, res.get(0).getNumValidDescendants()); + List res = new AssociationsFacetsExample().runSumAssociations(); + + assertEquals("Wrong number of results", 2, res.size()); + for (FacetResult fres : res) { + assertEquals("Wrong number of facets", 2, fres.getNumValidDescendants()); + } + Iterable it = res.get(0).getFacetResultNode().subResults; int i = 0; for (FacetResultNode fResNode : it) { - assertEquals("Wrong result for facet "+fResNode.label, expectedResults[i++], fResNode.value, 1E-5); + assertEquals("Wrong result for facet " + fResNode.label, EXPECTED_INT_SUM_RESULTS[i++], fResNode.value, 1E-5); } + + it = res.get(1).getFacetResultNode().subResults; + i = 0; + for (FacetResultNode fResNode : it) { + assertEquals("Wrong result for facet " + fResNode.label, EXPECTED_FLOAT_SUM_RESULTS[i++], fResNode.value, 1E-5); + } } } Index: lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationFloatSumAggregator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationFloatSumAggregator.java (revision 1445061) +++ lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationFloatSumAggregator.java (working copy) @@ -1,82 +0,0 @@ -package org.apache.lucene.facet.associations; - -import java.io.IOException; - -import org.apache.lucene.facet.collections.IntToFloatMap; -import org.apache.lucene.facet.params.CategoryListParams; -import org.apache.lucene.facet.search.Aggregator; -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.util.IntsRef; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * An {@link Aggregator} which computes the weight of a category as the sum of - * the float values associated with it in the result documents. - * - * @lucene.experimental - */ -public class AssociationFloatSumAggregator implements Aggregator { - - protected final String field; - protected final float[] sumArray; - protected final FloatAssociationsIterator associations; - - public AssociationFloatSumAggregator(float[] sumArray) throws IOException { - this(CategoryListParams.DEFAULT_FIELD, sumArray); - } - - public AssociationFloatSumAggregator(String field, float[] sumArray) throws IOException { - this.field = field; - associations = new FloatAssociationsIterator(field, new CategoryFloatAssociation()); - this.sumArray = sumArray; - } - - @Override - public void aggregate(int docID, float score, IntsRef ordinals) throws IOException { - IntToFloatMap values = associations.getAssociations(docID); - if (values != null) { - for (int i = 0; i < ordinals.length; i++) { - int ord = ordinals.ints[i]; - if (values.containsKey(ord)) { - sumArray[ord] += values.get(ord); - } - } - } - } - - @Override - public boolean equals(Object obj) { - if (obj == null || obj.getClass() != this.getClass()) { - return false; - } - AssociationFloatSumAggregator that = (AssociationFloatSumAggregator) obj; - return that.field.equals(field) && that.sumArray == sumArray; - } - - @Override - public int hashCode() { - return field.hashCode(); - } - - @Override - public boolean setNextReader(AtomicReaderContext context) throws IOException { - return associations.setNextReader(context); - } - -} Index: lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationFloatSumFacetRequest.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationFloatSumFacetRequest.java (revision 1445061) +++ lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationFloatSumFacetRequest.java (working copy) @@ -1,12 +1,8 @@ package org.apache.lucene.facet.associations; -import java.io.IOException; - -import org.apache.lucene.facet.search.Aggregator; import org.apache.lucene.facet.search.FacetArrays; import org.apache.lucene.facet.search.FacetRequest; import org.apache.lucene.facet.taxonomy.CategoryPath; -import org.apache.lucene.facet.taxonomy.TaxonomyReader; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -27,10 +23,7 @@ /** * A {@link FacetRequest} for weighting facets according to their float - * association by summing the association values. Note that this class caches - * the associations data in-memory by default. You can override - * {@link #createAggregator(boolean, FacetArrays, TaxonomyReader)} to return an - * {@link AssociationFloatSumAggregator} which does otherwise. + * association by summing the association values. * * @lucene.experimental */ @@ -45,13 +38,6 @@ } @Override - public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy) - throws IOException { - assert !useComplements : "complements are not supported by this FacetRequest"; - return new AssociationFloatSumAggregator(arrays.getFloatArray()); - } - - @Override public double getValueOf(FacetArrays arrays, int ordinal) { return arrays.getFloatArray()[ordinal]; } Index: lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationIntSumAggregator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationIntSumAggregator.java (revision 1445061) +++ lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationIntSumAggregator.java (working copy) @@ -1,82 +0,0 @@ -package org.apache.lucene.facet.associations; - -import java.io.IOException; - -import org.apache.lucene.facet.collections.IntToIntMap; -import org.apache.lucene.facet.params.CategoryListParams; -import org.apache.lucene.facet.search.Aggregator; -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.util.IntsRef; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * An {@link Aggregator} which computes the weight of a category as the sum of - * the integer values associated with it in the result documents. - * - * @lucene.experimental - */ -public class AssociationIntSumAggregator implements Aggregator { - - protected final String field; - protected final int[] sumArray; - protected final IntAssociationsIterator associations; - - public AssociationIntSumAggregator(int[] sumArray) throws IOException { - this(CategoryListParams.DEFAULT_FIELD, sumArray); - } - - public AssociationIntSumAggregator(String field, int[] sumArray) throws IOException { - this.field = field; - associations = new IntAssociationsIterator(field, new CategoryIntAssociation()); - this.sumArray = sumArray; - } - - @Override - public void aggregate(int docID, float score, IntsRef ordinals) throws IOException { - IntToIntMap values = associations.getAssociations(docID); - if (values != null) { - for (int i = 0; i < ordinals.length; i++) { - int ord = ordinals.ints[i]; - if (values.containsKey(ord)) { - sumArray[ord] += values.get(ord); - } - } - } - } - - @Override - public boolean equals(Object obj) { - if (obj == null || obj.getClass() != this.getClass()) { - return false; - } - AssociationIntSumAggregator that = (AssociationIntSumAggregator) obj; - return that.field.equals(field) && that.sumArray == sumArray; - } - - @Override - public int hashCode() { - return field.hashCode(); - } - - @Override - public boolean setNextReader(AtomicReaderContext context) throws IOException { - return associations.setNextReader(context); - } - -} Index: lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationIntSumFacetRequest.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationIntSumFacetRequest.java (revision 1445061) +++ lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationIntSumFacetRequest.java (working copy) @@ -1,12 +1,8 @@ package org.apache.lucene.facet.associations; -import java.io.IOException; - -import org.apache.lucene.facet.search.Aggregator; import org.apache.lucene.facet.search.FacetArrays; import org.apache.lucene.facet.search.FacetRequest; import org.apache.lucene.facet.taxonomy.CategoryPath; -import org.apache.lucene.facet.taxonomy.TaxonomyReader; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -27,10 +23,7 @@ /** * A {@link FacetRequest} for weighting facets according to their integer - * association by summing the association values. Note that this class caches - * the associations data in-memory by default. You can override - * {@link #createAggregator(boolean, FacetArrays, TaxonomyReader)} to return an - * {@link AssociationFloatSumAggregator} which does otherwise. + * association by summing the association values. * * @lucene.experimental */ @@ -45,13 +38,6 @@ } @Override - public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy) - throws IOException { - assert !useComplements : "complements are not supported by this FacetRequest"; - return new AssociationIntSumAggregator(arrays.getIntArray()); - } - - @Override public FacetArraysSource getFacetArraysSource() { return FacetArraysSource.INT; } Index: lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationIntSumFacetsAggregator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationIntSumFacetsAggregator.java (revision 1445061) +++ lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationIntSumFacetsAggregator.java (working copy) @@ -1,47 +0,0 @@ -package org.apache.lucene.facet.associations; - -import java.io.IOException; - -import org.apache.lucene.facet.params.CategoryListParams; -import org.apache.lucene.facet.search.FacetArrays; -import org.apache.lucene.facet.search.FacetRequest; -import org.apache.lucene.facet.search.FacetsAggregator; -import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * A {@link FacetsAggregator} which computes the weight of a category as the sum - * of the integer values associated with it in the result documents. - */ -public class AssociationIntSumFacetsAggregator implements FacetsAggregator { - - @Override - public void aggregate(MatchingDocs matchingDocs, CategoryListParams clp, - FacetArrays facetArrays) throws IOException {} - - @Override - public void rollupValues(FacetRequest fr, int ordinal, int[] children, - int[] siblings, FacetArrays facetArrays) {} - - @Override - public boolean requiresDocScores() { - return false; - } - -} Index: lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsFacetFields.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsFacetFields.java (revision 1445061) +++ lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsFacetFields.java (working copy) @@ -7,7 +7,6 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.TextField; -import org.apache.lucene.facet.index.CountingListBuilder; import org.apache.lucene.facet.index.DrillDownStream; import org.apache.lucene.facet.index.FacetFields; import org.apache.lucene.facet.params.CategoryListParams; @@ -100,12 +99,7 @@ protected Map getCategoryListData(CategoryListParams categoryListParams, IntsRef ordinals, Iterable categories) throws IOException { AssociationsListBuilder associations = new AssociationsListBuilder((CategoryAssociationsContainer) categories); - CountingListBuilder counting = new CountingListBuilder(categoryListParams, indexingParams, taxonomyWriter); - // CountingListBuilder modifies the ordinals array, by e.g. adding parent ordinals, sorting etc. - // Therefore first build the associations list and only afterwards the counting list. - final Map res = associations.build(ordinals, categories); - res.putAll(counting.build(ordinals, categories)); - return res; + return associations.build(ordinals, categories); } @Override Index: lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsIterator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsIterator.java (revision 1445061) +++ lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsIterator.java (working copy) @@ -1,85 +0,0 @@ -package org.apache.lucene.facet.associations; - -import java.io.IOException; - -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.BinaryDocValues; -import org.apache.lucene.store.ByteArrayDataInput; -import org.apache.lucene.util.BytesRef; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * An iterator over a document's category associations. - * - * @lucene.experimental - */ -public abstract class AssociationsIterator { - - private final T association; - private final String dvField; - private final BytesRef bytes = new BytesRef(32); - - private BinaryDocValues current; - - /** - * Construct a new associations iterator. The given - * {@link CategoryAssociation} is used to deserialize the association values. - * It is assumed that all association values can be deserialized with the - * given {@link CategoryAssociation}. - */ - public AssociationsIterator(String field, T association) throws IOException { - this.association = association; - this.dvField = field + association.getCategoryListID(); - } - - /** - * Sets the {@link AtomicReaderContext} for which {@link #setNextDoc(int)} - * calls will be made. Returns true iff this reader has associations for any - * of the documents belonging to the association given to the constructor. - */ - public final boolean setNextReader(AtomicReaderContext context) throws IOException { - current = context.reader().getBinaryDocValues(dvField); - return current != null; - } - - /** - * Skip to the requested document. Returns true iff the document has category - * association values and they were read successfully. Associations are - * handled through {@link #handleAssociation(int, CategoryAssociation)} by - * extending classes. - */ - protected final boolean setNextDoc(int docID) throws IOException { - current.get(docID, bytes); - if (bytes.length == 0) { - return false; // no associations for the requested document - } - - ByteArrayDataInput in = new ByteArrayDataInput(bytes.bytes, bytes.offset, bytes.length); - while (!in.eof()) { - int ordinal = in.readInt(); - association.deserialize(in); - handleAssociation(ordinal, association); - } - return true; - } - - /** A hook for extending classes to handle the given association value for the ordinal. */ - protected abstract void handleAssociation(int ordinal, T association); - -} Index: lucene/facet/src/java/org/apache/lucene/facet/associations/FloatAssociationsIterator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/associations/FloatAssociationsIterator.java (revision 1445061) +++ lucene/facet/src/java/org/apache/lucene/facet/associations/FloatAssociationsIterator.java (working copy) @@ -1,57 +0,0 @@ -package org.apache.lucene.facet.associations; - -import java.io.IOException; - -import org.apache.lucene.facet.collections.IntToFloatMap; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * An {@link AssociationsIterator} over integer association values. - * - * @lucene.experimental - */ -public class FloatAssociationsIterator extends AssociationsIterator { - - private final IntToFloatMap ordinalAssociations = new IntToFloatMap(); - - /** - * Constructs a new {@link FloatAssociationsIterator}. - */ - public FloatAssociationsIterator(String field, CategoryFloatAssociation association) throws IOException { - super(field, association); - } - - @Override - protected void handleAssociation(int ordinal, CategoryFloatAssociation association) { - ordinalAssociations.put(ordinal, association.getValue()); - } - - /** - * Returns the float association values of the categories that are associated - * with the given document, or {@code null} if the document has no - * associations. - *

- * NOTE: you are not expected to modify the returned map. - */ - public IntToFloatMap getAssociations(int docID) throws IOException { - ordinalAssociations.clear(); - return setNextDoc(docID) ? ordinalAssociations : null; - } - -} Index: lucene/facet/src/java/org/apache/lucene/facet/associations/IntAssociationsIterator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/associations/IntAssociationsIterator.java (revision 1445061) +++ lucene/facet/src/java/org/apache/lucene/facet/associations/IntAssociationsIterator.java (working copy) @@ -1,57 +0,0 @@ -package org.apache.lucene.facet.associations; - -import java.io.IOException; - -import org.apache.lucene.facet.collections.IntToIntMap; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * An {@link AssociationsIterator} over integer association values. - * - * @lucene.experimental - */ -public class IntAssociationsIterator extends AssociationsIterator { - - private final IntToIntMap ordinalAssociations = new IntToIntMap(); - - /** - * Constructs a new {@link IntAssociationsIterator}. - */ - public IntAssociationsIterator(String field, CategoryIntAssociation association) throws IOException { - super(field, association); - } - - @Override - protected void handleAssociation(int ordinal, CategoryIntAssociation association) { - ordinalAssociations.put(ordinal, association.getValue()); - } - - /** - * Returns the integer association values of the categories that are - * associated with the given document, or {@code null} if the document has no - * associations. - *

- * NOTE: you are not expected to modify the returned map. - */ - public IntToIntMap getAssociations(int docID) throws IOException { - ordinalAssociations.clear(); - return setNextDoc(docID) ? ordinalAssociations : null; - } - -} Index: lucene/facet/src/java/org/apache/lucene/facet/associations/MultiAssociationsFacetsAggregator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/associations/MultiAssociationsFacetsAggregator.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/associations/MultiAssociationsFacetsAggregator.java (working copy) @@ -0,0 +1,92 @@ +package org.apache.lucene.facet.associations; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.lucene.facet.params.CategoryListParams; +import org.apache.lucene.facet.search.FacetArrays; +import org.apache.lucene.facet.search.FacetRequest; +import org.apache.lucene.facet.search.FacetsAggregator; +import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; +import org.apache.lucene.facet.taxonomy.CategoryPath; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A {@link FacetsAggregator} which chains multiple aggregators for aggregating + * the association values of categories that belong to the same category list. + * While nothing prevents you from chaining general purpose aggregators, it is + * only useful for aggregating association values, as each association type is + * written in its own list. + * + * @lucene.experimental + */ +public class MultiAssociationsFacetsAggregator implements FacetsAggregator { + + private final Map categoryAggregators; + private final List aggregators; + + /** + * Creates a new {@link MultiAssociationsFacetsAggregator} over the given + * aggregators. The mapping is used by + * {@link #rollupValues(FacetRequest, int, int[], int[], FacetArrays)} to + * rollup the values of the speicfic category by the corresponding + * {@link FacetsAggregator}. However, since each {@link FacetsAggregator} + * handles the associations of a specific type, which could cover multiple + * categories, the aggregation is done on the unique set of aggregators, which + * are identified by their class. + */ + public MultiAssociationsFacetsAggregator(Map aggregators) { + this.categoryAggregators = aggregators; + + // make sure that each FacetsAggregator class is invoked only once, or + // otherwise categories may be aggregated multiple times. + Map, FacetsAggregator> aggsClasses = + new HashMap,FacetsAggregator>(); + for (FacetsAggregator fa : aggregators.values()) { + aggsClasses.put(fa.getClass(), fa); + } + this.aggregators = new ArrayList(aggsClasses.values()); + } + + @Override + public void aggregate(MatchingDocs matchingDocs, CategoryListParams clp, FacetArrays facetArrays) throws IOException { + for (FacetsAggregator fa : aggregators) { + fa.aggregate(matchingDocs, clp, facetArrays); + } + } + + @Override + public void rollupValues(FacetRequest fr, int ordinal, int[] children, int[] siblings, FacetArrays facetArrays) { + categoryAggregators.get(fr.categoryPath).rollupValues(fr, ordinal, children, siblings, facetArrays); + } + + @Override + public boolean requiresDocScores() { + for (FacetsAggregator fa : aggregators) { + if (fa.requiresDocScores()) { + return true; + } + } + return false; + } + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/associations/MultiAssociationsFacetsAggregator.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/associations/SumFloatAssociationFacetsAggregator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/associations/SumFloatAssociationFacetsAggregator.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/associations/SumFloatAssociationFacetsAggregator.java (working copy) @@ -0,0 +1,98 @@ +package org.apache.lucene.facet.associations; + +import java.io.IOException; + +import org.apache.lucene.facet.params.CategoryListParams; +import org.apache.lucene.facet.search.FacetArrays; +import org.apache.lucene.facet.search.FacetRequest; +import org.apache.lucene.facet.search.FacetsAggregator; +import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.util.BytesRef; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A {@link FacetsAggregator} which computes the weight of a category as the sum + * of the float values associated with it in the result documents. Assumes that + * the association encoded for each ordinal is {@link CategoryFloatAssociation}. + * + * @lucene.experimental + */ +public class SumFloatAssociationFacetsAggregator implements FacetsAggregator { + + private final BytesRef bytes = new BytesRef(32); + + @Override + public void aggregate(MatchingDocs matchingDocs, CategoryListParams clp, FacetArrays facetArrays) throws IOException { + BinaryDocValues dv = matchingDocs.context.reader().getBinaryDocValues(clp.field + CategoryFloatAssociation.ASSOCIATION_LIST_ID); + if (dv == null) { + return; // no float associations in this reader + } + + final int length = matchingDocs.bits.length(); + final float[] values = facetArrays.getFloatArray(); + int doc = 0; + while (doc < length && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) { + dv.get(doc, bytes); + if (bytes.length == 0) { + continue; // no associations for this document + } + + // aggreate float association values for ordinals + int bytesUpto = bytes.offset + bytes.length; + int pos = bytes.offset; + while (pos < bytesUpto) { + int ordinal = ((bytes.bytes[pos++] & 0xFF) << 24) | ((bytes.bytes[pos++] & 0xFF) << 16) + | ((bytes.bytes[pos++] & 0xFF) << 8) | (bytes.bytes[pos++] & 0xFF); + + int value = ((bytes.bytes[pos++] & 0xFF) << 24) | ((bytes.bytes[pos++] & 0xFF) << 16) + | ((bytes.bytes[pos++] & 0xFF) << 8) | (bytes.bytes[pos++] & 0xFF); + + values[ordinal] += Float.intBitsToFloat(value); + } + + ++doc; + } + } + + @Override + public boolean requiresDocScores() { + return false; + } + + private float rollupValues(int ordinal, int[] children, int[] siblings, float[] scores) { + float Value = 0f; + while (ordinal != TaxonomyReader.INVALID_ORDINAL) { + float childValue = scores[ordinal]; + childValue += rollupValues(children[ordinal], children, siblings, scores); + scores[ordinal] = childValue; + Value += childValue; + ordinal = siblings[ordinal]; + } + return Value; + } + + @Override + public void rollupValues(FacetRequest fr, int ordinal, int[] children, int[] siblings, FacetArrays facetArrays) { + float[] values = facetArrays.getFloatArray(); + values[ordinal] += rollupValues(children[ordinal], children, siblings, values); + } + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/associations/SumFloatAssociationFacetsAggregator.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/associations/SumIntAssociationFacetsAggregator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/associations/SumIntAssociationFacetsAggregator.java (working copy) +++ lucene/facet/src/java/org/apache/lucene/facet/associations/SumIntAssociationFacetsAggregator.java (working copy) @@ -7,6 +7,9 @@ import org.apache.lucene.facet.search.FacetRequest; import org.apache.lucene.facet.search.FacetsAggregator; import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.util.BytesRef; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -27,21 +30,67 @@ /** * A {@link FacetsAggregator} which computes the weight of a category as the sum - * of the integer values associated with it in the result documents. + * of the integer values associated with it in the result documents. Assumes that + * the association encoded for each ordinal is {@link CategoryIntAssociation}. */ -public class AssociationIntSumFacetsAggregator implements FacetsAggregator { +public class SumIntAssociationFacetsAggregator implements FacetsAggregator { + + private final BytesRef bytes = new BytesRef(32); @Override - public void aggregate(MatchingDocs matchingDocs, CategoryListParams clp, - FacetArrays facetArrays) throws IOException {} - + public void aggregate(MatchingDocs matchingDocs, CategoryListParams clp, FacetArrays facetArrays) throws IOException { + BinaryDocValues dv = matchingDocs.context.reader().getBinaryDocValues(clp.field + CategoryIntAssociation.ASSOCIATION_LIST_ID); + if (dv == null) { + return; // no int associations in this reader + } + + final int length = matchingDocs.bits.length(); + final int[] values = facetArrays.getIntArray(); + int doc = 0; + while (doc < length && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) { + dv.get(doc, bytes); + if (bytes.length == 0) { + continue; // no associations for this document + } + + // aggreate association values for ordinals + int bytesUpto = bytes.offset + bytes.length; + int pos = bytes.offset; + while (pos < bytesUpto) { + int ordinal = ((bytes.bytes[pos++] & 0xFF) << 24) | ((bytes.bytes[pos++] & 0xFF) << 16) + | ((bytes.bytes[pos++] & 0xFF) << 8) | (bytes.bytes[pos++] & 0xFF); + + int value = ((bytes.bytes[pos++] & 0xFF) << 24) | ((bytes.bytes[pos++] & 0xFF) << 16) + | ((bytes.bytes[pos++] & 0xFF) << 8) | (bytes.bytes[pos++] & 0xFF); + + values[ordinal] += value; + } + + ++doc; + } + } + @Override - public void rollupValues(FacetRequest fr, int ordinal, int[] children, - int[] siblings, FacetArrays facetArrays) {} - - @Override public boolean requiresDocScores() { return false; } - + + private float rollupValues(int ordinal, int[] children, int[] siblings, float[] scores) { + float Value = 0f; + while (ordinal != TaxonomyReader.INVALID_ORDINAL) { + float childValue = scores[ordinal]; + childValue += rollupValues(children[ordinal], children, siblings, scores); + scores[ordinal] = childValue; + Value += childValue; + ordinal = siblings[ordinal]; + } + return Value; + } + + @Override + public void rollupValues(FacetRequest fr, int ordinal, int[] children, int[] siblings, FacetArrays facetArrays) { + float[] values = facetArrays.getFloatArray(); + values[ordinal] += rollupValues(children[ordinal], children, siblings, values); + } + } Index: lucene/facet/src/java/org/apache/lucene/facet/search/FacetRequest.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/FacetRequest.java (revision 1445061) +++ lucene/facet/src/java/org/apache/lucene/facet/search/FacetRequest.java (working copy) @@ -36,7 +36,7 @@ * @lucene.experimental */ public abstract class FacetRequest { - + /** * Result structure manner of applying request's limits such as * {@link FacetRequest#getNumLabel()} and {@link FacetRequest#numResults}. @@ -45,7 +45,7 @@ public enum ResultMode { /** Limits are applied per node, and the result has a full tree structure. */ PER_NODE_IN_TREE, - + /** Limits are applied globally, on total number of results, and the result has a flat structure. */ GLOBAL_FLAT } @@ -62,10 +62,10 @@ * {@link FacetResultsHandler}. */ public enum FacetArraysSource { INT, FLOAT, BOTH } - + /** Requested sort order for the results. */ public enum SortOrder { ASCENDING, DESCENDING } - + /** * Default depth for facets accumulation. * @see #getDepth() @@ -84,15 +84,15 @@ private int numLabel; private int depth; private SortOrder sortOrder; - + /** * Computed at construction, this hashCode is based on two final members * {@link CategoryPath} and numResults */ private final int hashCode; - + private ResultMode resultMode = DEFAULT_RESULT_MODE; - + /** * Initialize the request with a given path, and a requested number of facets * results. By default, all returned results would be labeled - to alter this @@ -123,7 +123,7 @@ hashCode = categoryPath.hashCode() ^ this.numResults; } - + /** * Create an aggregator for this facet request. Aggregator action depends on * request definition. For a count request, it will usually increment the @@ -138,9 +138,12 @@ * reader of taxonomy in effect. * @throws IOException If there is a low-level I/O error. */ - public abstract Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy) - throws IOException; - + public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy) + throws IOException { + throw new UnsupportedOperationException("this FacetRequest does not support this type of Aggregator anymore; " + + "you should override FacetsAccumulator to return the proper FacetsAggregator"); + } + @Override public boolean equals(Object o) { if (o instanceof FacetRequest) { @@ -154,7 +157,7 @@ } return false; } - + /** * How deeply to look under the given category. If the depth is 0, * only the category itself is counted. If the depth is 1, its immediate @@ -198,17 +201,17 @@ public final int getNumLabel() { return numLabel; } - + /** Return the requested result mode. */ public final ResultMode getResultMode() { return resultMode; } - + /** Return the requested order of results. */ public final SortOrder getSortOrder() { return sortOrder; } - + /** * Return the value of a category used for facets computations for this * request. For a count request this would be the count for that facet, i.e. @@ -232,16 +235,16 @@ // That, together with getFacetArraysSource should allow ResultHandlers to // efficiently obtain the values from the arrays directly public abstract double getValueOf(FacetArrays arrays, int idx); - + @Override public int hashCode() { return hashCode; } - + public void setDepth(int depth) { this.depth = depth; } - + public void setNumLabel(int numLabel) { this.numLabel = numLabel; } @@ -253,7 +256,7 @@ public void setResultMode(ResultMode resultMode) { this.resultMode = resultMode; } - + public void setSortOrder(SortOrder sortOrder) { this.sortOrder = sortOrder; } @@ -262,6 +265,5 @@ public String toString() { return categoryPath.toString()+" nRes="+numResults+" nLbl="+numLabel; } - + } - \ No newline at end of file Index: lucene/facet/src/test/org/apache/lucene/facet/associations/AssociationsFacetRequestTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/associations/AssociationsFacetRequestTest.java (revision 1445061) +++ lucene/facet/src/test/org/apache/lucene/facet/associations/AssociationsFacetRequestTest.java (working copy) @@ -1,19 +1,17 @@ package org.apache.lucene.facet.associations; +import java.util.HashMap; import java.util.List; +import java.util.Map; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.document.Document; import org.apache.lucene.facet.FacetTestCase; -import org.apache.lucene.facet.associations.AssociationFloatSumFacetRequest; -import org.apache.lucene.facet.associations.AssociationIntSumFacetRequest; -import org.apache.lucene.facet.associations.AssociationsFacetFields; -import org.apache.lucene.facet.associations.CategoryAssociationsContainer; -import org.apache.lucene.facet.associations.CategoryFloatAssociation; -import org.apache.lucene.facet.associations.CategoryIntAssociation; import org.apache.lucene.facet.params.FacetSearchParams; import org.apache.lucene.facet.search.FacetResult; +import org.apache.lucene.facet.search.FacetsAccumulator; +import org.apache.lucene.facet.search.FacetsAggregator; import org.apache.lucene.facet.search.FacetsCollector; import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.facet.taxonomy.TaxonomyWriter; @@ -48,7 +46,7 @@ /** Test for associations */ public class AssociationsFacetRequestTest extends FacetTestCase { - + private static Directory dir; private static IndexReader reader; private static Directory taxoDir; @@ -102,16 +100,23 @@ @Test public void testIntSumAssociation() throws Exception { DirectoryTaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDir); - + // facet requests for two facets FacetSearchParams fsp = new FacetSearchParams( new AssociationIntSumFacetRequest(aint, 10), new AssociationIntSumFacetRequest(bint, 10)); Query q = new MatchAllDocsQuery(); - - FacetsCollector fc = FacetsCollector.create(fsp, reader, taxo); + FacetsAccumulator fa = new FacetsAccumulator(fsp, reader, taxo) { + @Override + public FacetsAggregator getAggregator() { + return new SumIntAssociationFacetsAggregator(); + } + }; + + FacetsCollector fc = FacetsCollector.create(fa); + IndexSearcher searcher = newSearcher(reader); searcher.search(q, fc); List res = fc.getFacetResults(); @@ -127,35 +132,39 @@ @Test public void testFloatSumAssociation() throws Exception { DirectoryTaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDir); - + // facet requests for two facets FacetSearchParams fsp = new FacetSearchParams( new AssociationFloatSumFacetRequest(afloat, 10), new AssociationFloatSumFacetRequest(bfloat, 10)); Query q = new MatchAllDocsQuery(); - - FacetsCollector fc = FacetsCollector.create(fsp, reader, taxo); + FacetsAccumulator fa = new FacetsAccumulator(fsp, reader, taxo) { + @Override + public FacetsAggregator getAggregator() { + return new SumFloatAssociationFacetsAggregator(); + } + }; + + FacetsCollector fc = FacetsCollector.create(fa); + IndexSearcher searcher = newSearcher(reader); searcher.search(q, fc); List res = fc.getFacetResults(); assertNotNull("No results!",res); - assertEquals("Wrong number of results!",2, res.size()); + assertEquals("Wrong number of results!", 2, res.size()); assertEquals("Wrong count for category 'a'!",50f, (float) res.get(0).getFacetResultNode().value, 0.00001); assertEquals("Wrong count for category 'b'!",10f, (float) res.get(1).getFacetResultNode().value, 0.00001); taxo.close(); } - + @Test public void testDifferentAggregatorsSameCategoryList() throws Exception { - // Same category list cannot be aggregated by two different aggregators. If - // you want to do that, you need to separate the categories into two - // category list (you'll still have one association list). DirectoryTaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDir); - + // facet requests for two facets FacetSearchParams fsp = new FacetSearchParams( new AssociationIntSumFacetRequest(aint, 10), @@ -164,18 +173,33 @@ new AssociationFloatSumFacetRequest(bfloat, 10)); Query q = new MatchAllDocsQuery(); - - FacetsCollector fc = FacetsCollector.create(fsp, reader, taxo); + final SumIntAssociationFacetsAggregator sumInt = new SumIntAssociationFacetsAggregator(); + final SumFloatAssociationFacetsAggregator sumFloat = new SumFloatAssociationFacetsAggregator(); + final Map aggregators = new HashMap(); + aggregators.put(aint, sumInt); + aggregators.put(bint, sumInt); + aggregators.put(afloat, sumFloat); + aggregators.put(bfloat, sumFloat); + FacetsAccumulator fa = new FacetsAccumulator(fsp, reader, taxo) { + @Override + public FacetsAggregator getAggregator() { + return new MultiAssociationsFacetsAggregator(aggregators); + } + }; + FacetsCollector fc = FacetsCollector.create(fa); + IndexSearcher searcher = newSearcher(reader); searcher.search(q, fc); - try { - fc.getFacetResults(); - fail("different aggregators for same category list should not be supported"); - } catch (RuntimeException e) { - // ok - expected - } + List res = fc.getFacetResults(); + + assertEquals("Wrong number of results!", 4, res.size()); + assertEquals("Wrong count for category 'a'!", 200, (int) res.get(0).getFacetResultNode().value); + assertEquals("Wrong count for category 'b'!", 150, (int) res.get(1).getFacetResultNode().value); + assertEquals("Wrong count for category 'a'!",50f, (float) res.get(2).getFacetResultNode().value, 0.00001); + assertEquals("Wrong count for category 'b'!",10f, (float) res.get(3).getFacetResultNode().value, 0.00001); + taxo.close(); } - + }