Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1443446) +++ lucene/CHANGES.txt (working copy) @@ -43,6 +43,13 @@ PathPolicy was removed, you should extend FacetFields and DrillDownStream to control which categories are added as drill-down terms. (Shai Erera) +* LUCENE-4757: Cleanup and refactoring of FacetsAccumulator, FacetRequest, + FacetsAggregator and FacetResultsHandler API. If your application did + FacetsCollector.create(), you should not be affected, but if you wrote + an Aggregator, then you should migrate it to the per-segment + FacetsAggregator. You can still use StandardFacetsAccumulator, which works + with the old API (for now). (Shai Erera) + Optimizations * LUCENE-4687: BloomFilterPostingsFormat now lazily initializes delegate Index: lucene/demo/src/java/org/apache/lucene/demo/facet/adaptive/AdaptiveSearcher.java =================================================================== --- lucene/demo/src/java/org/apache/lucene/demo/facet/adaptive/AdaptiveSearcher.java (revision 1443446) +++ lucene/demo/src/java/org/apache/lucene/demo/facet/adaptive/AdaptiveSearcher.java (working copy) @@ -5,7 +5,7 @@ import org.apache.lucene.demo.facet.ExampleUtils; import org.apache.lucene.demo.facet.simple.SimpleUtils; import org.apache.lucene.facet.search.AdaptiveFacetsAccumulator; -import org.apache.lucene.facet.search.ScoredDocIdCollector; +import org.apache.lucene.facet.search.FacetsCollector; import org.apache.lucene.facet.search.params.CountFacetRequest; import org.apache.lucene.facet.search.params.FacetSearchParams; import org.apache.lucene.facet.search.results.FacetResult; @@ -56,7 +56,7 @@ * @throws Exception on error (no detailed exception handling here for sample simplicity * @return facet results */ - public static List searchWithFacets (Directory indexDir, Directory taxoDir) throws Exception { + public static List searchWithFacets(Directory indexDir, Directory taxoDir) throws Exception { // prepare index reader and taxonomy. TaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDir); IndexReader indexReader = DirectoryReader.open(indexDir); @@ -76,20 +76,17 @@ // regular collector for scoring matched documents TopScoreDocCollector topDocsCollector = TopScoreDocCollector.create(10, true); - // docids collector for guiding facets accumulation (scoring disabled) - ScoredDocIdCollector docIdsCollecor = ScoredDocIdCollector.create(indexReader.maxDoc(), false); - // Faceted search parameters indicate which facets are we interested in - FacetSearchParams facetSearchParams = new FacetSearchParams( - new CountFacetRequest(new CategoryPath("root", "a"), 10)); + FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("root", "a"), 10)); + AdaptiveFacetsAccumulator accumulator = new AdaptiveFacetsAccumulator(fsp, indexReader, taxo); + FacetsCollector fc = FacetsCollector.create(accumulator); // search, into both collectors. note: in case only facets accumulation // is required, the topDocCollector part can be totally discarded - searcher.search(q, MultiCollector.wrap(topDocsCollector, docIdsCollecor)); + searcher.search(q, MultiCollector.wrap(topDocsCollector, fc)); // Obtain facets results and print them - AdaptiveFacetsAccumulator accumulator = new AdaptiveFacetsAccumulator(facetSearchParams, indexReader, taxo); - List res = accumulator.accumulate(docIdsCollecor.getScoredDocIDs()); + List res = fc.getFacetResults(); int i = 0; for (FacetResult facetResult : res) { Index: lucene/facet/src/java/org/apache/lucene/facet/associations/search/AssociationIntSumFacetsAggregator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/associations/search/AssociationIntSumFacetsAggregator.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/associations/search/AssociationIntSumFacetsAggregator.java (working copy) @@ -0,0 +1,46 @@ +package org.apache.lucene.facet.associations.search; + +import java.io.IOException; + +import org.apache.lucene.facet.index.params.CategoryListParams; +import org.apache.lucene.facet.search.FacetArrays; +import org.apache.lucene.facet.search.FacetsAggregator; +import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A {@link FacetsAggregator} which computes the weight of a category as the sum + * of the integer values associated with it in the result documents. + */ +public class AssociationIntSumFacetsAggregator implements FacetsAggregator { + + @Override + public void aggregate(MatchingDocs matchingDocs, CategoryListParams clp, + FacetArrays facetArrays) throws IOException {} + + @Override + public void rollupValues(int ordinal, int[] children, int[] siblings, + FacetArrays facetArrays) {} + + @Override + public boolean requiresDocScores() { + return false; + } + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/associations/search/AssociationIntSumFacetsAggregator.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/partitions/search/IntermediateFacetResult.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/partitions/search/IntermediateFacetResult.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/partitions/search/IntermediateFacetResult.java (working copy) @@ -0,0 +1,42 @@ +package org.apache.lucene.facet.partitions.search; + +import org.apache.lucene.facet.search.FacetResultsHandler; +import org.apache.lucene.facet.search.params.FacetRequest; +import org.apache.lucene.facet.search.results.FacetResult; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Intermediate {@link FacetResult} of faceted search. + *

+ * This is an empty interface on purpose. + *

+ * It allows {@link FacetResultsHandler} to return intermediate result objects + * that only it knows how to interpret, and so the handler has maximal freedom + * in defining what an intermediate result is, depending on its specific logic. + * + * @lucene.experimental + */ +public interface IntermediateFacetResult { + + /** + * Facet request for which this temporary result was created. + */ + FacetRequest getFacetRequest(); + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/partitions/search/IntermediateFacetResult.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/partitions/search/PartitionsFacetResultsHandler.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/partitions/search/PartitionsFacetResultsHandler.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/partitions/search/PartitionsFacetResultsHandler.java (working copy) @@ -0,0 +1,137 @@ +package org.apache.lucene.facet.partitions.search; + +import java.io.IOException; + +import org.apache.lucene.facet.search.FacetArrays; +import org.apache.lucene.facet.search.FacetResultsHandler; +import org.apache.lucene.facet.search.ScoredDocIDs; +import org.apache.lucene.facet.search.StandardFacetsAccumulator; +import org.apache.lucene.facet.search.params.FacetRequest; +import org.apache.lucene.facet.search.results.FacetResult; +import org.apache.lucene.facet.search.results.FacetResultNode; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A {@link FacetResultsHandler} designed to work with facet partitions. + * + * @lucene.experimental + */ +public abstract class PartitionsFacetResultsHandler extends FacetResultsHandler { + + public PartitionsFacetResultsHandler(TaxonomyReader taxonomyReader, FacetRequest facetRequest, + FacetArrays facetArrays) { + super(taxonomyReader, facetRequest, facetArrays); + } + + + /** + * Fetch results of a single partition, given facet arrays for that partition, + * and based on the matching documents and faceted search parameters. + * @param offset + * offset in input arrays where partition starts + * + * @return temporary facet result, potentially, to be passed back to + * this result handler for merging, or null in case that + * constructor parameter, facetRequest, requests an + * illegal FacetResult, like, e.g., a root node category path that + * does not exist in constructor parameter taxonomyReader + * . + * @throws IOException + * on error + */ + public abstract IntermediateFacetResult fetchPartitionResult(int offset) throws IOException; + + /** + * Merge results of several facet partitions. Logic of the merge is undefined + * and open for interpretations. For example, a merge implementation could + * keep top K results. Passed {@link IntermediateFacetResult} must be ones + * that were created by this handler otherwise a {@link ClassCastException} is + * thrown. In addition, all passed {@link IntermediateFacetResult} must have + * the same {@link FacetRequest} otherwise an {@link IllegalArgumentException} + * is thrown. + * + * @param tmpResults one or more temporary results created by this + * handler. + * @return temporary facet result that represents to union, as specified by + * this handler, of the input temporary facet results. + * @throws IOException on error. + * @throws ClassCastException if the temporary result passed was not created + * by this handler + * @throws IllegalArgumentException if passed facetResults do not + * have the same {@link FacetRequest} + * @see IntermediateFacetResult#getFacetRequest() + */ + public abstract IntermediateFacetResult mergeResults(IntermediateFacetResult... tmpResults) throws IOException; + + /** + * Create a facet result from the temporary result. + * @param tmpResult temporary result to be rendered as a {@link FacetResult} + * @throws IOException on error. + */ + public abstract FacetResult renderFacetResult(IntermediateFacetResult tmpResult) throws IOException ; + + /** + * Perform any rearrangement as required on a facet result that has changed after + * it was rendered. + *

+ * Possible use case: a sampling facets accumulator invoked another + * other facets accumulator on a sample set of documents, obtained + * rendered facet results, fixed their counts, and now it is needed + * to sort the results differently according to the fixed counts. + * @param facetResult result to be rearranged. + * @see FacetResultNode#value + */ + public abstract FacetResult rearrangeFacetResult(FacetResult facetResult); + + /** + * Label results according to settings in {@link FacetRequest}, such as + * {@link FacetRequest#getNumLabel()}. Usually invoked by + * {@link StandardFacetsAccumulator#accumulate(ScoredDocIDs)} + * + * @param facetResult + * facet result to be labeled. + * @throws IOException + * on error + */ + public abstract void labelResult(FacetResult facetResult) throws IOException; + + /** + * Check if an array contains the partition which contains ordinal + * + * @param ordinal + * checked facet + * @param facetArrays + * facet arrays for the certain partition + * @param offset + * offset in input arrays where partition starts + */ + protected boolean isSelfPartition (int ordinal, FacetArrays facetArrays, int offset) { + int partitionSize = facetArrays.arrayLength; + return ordinal / partitionSize == offset / partitionSize; + } + + @Override + public final FacetResult compute() throws IOException { + FacetResult res = renderFacetResult(fetchPartitionResult(0)); + labelResult(res); + return res; + } + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/partitions/search/PartitionsFacetResultsHandler.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/search/AdaptiveFacetsAccumulator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/AdaptiveFacetsAccumulator.java (revision 1443446) +++ lucene/facet/src/java/org/apache/lucene/facet/search/AdaptiveFacetsAccumulator.java (working copy) @@ -74,7 +74,7 @@ @Override public List accumulate(ScoredDocIDs docids) throws IOException { - FacetsAccumulator delegee = appropriateFacetCountingAccumulator(docids); + StandardFacetsAccumulator delegee = appropriateFacetCountingAccumulator(docids); if (delegee == this) { return super.accumulate(docids); @@ -87,7 +87,7 @@ * Compute the appropriate facet accumulator to use. * If no special/clever adaptation is possible/needed return this (self). */ - private FacetsAccumulator appropriateFacetCountingAccumulator(ScoredDocIDs docids) { + private StandardFacetsAccumulator appropriateFacetCountingAccumulator(ScoredDocIDs docids) { // Verify that searchPareams permit sampling/complement/etc... otherwise do default if (!mayComplement()) { return this; Index: lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsAggregator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsAggregator.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsAggregator.java (working copy) @@ -0,0 +1,83 @@ +package org.apache.lucene.facet.search; + +import java.io.IOException; + +import org.apache.lucene.facet.index.params.CategoryListParams; +import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.util.IntsRef; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A {@link FacetsAggregator} which counts the number of times each category + * appears in the given set of documents. This aggregator uses the + * {@link CategoryListIterator} to read the encoded categories. If you used the + * default settings while idnexing, you can use + * {@link FastCountingFacetsAggregator} for better performance. + * + * @lucene.experimental + */ +public class CountingFacetsAggregator implements FacetsAggregator { + + private final IntsRef ordinals = new IntsRef(32); + + @Override + public void aggregate(MatchingDocs matchingDocs, CategoryListParams clp, FacetArrays facetArrays) throws IOException { + final CategoryListIterator cli = clp.createCategoryListIterator(0); + if (!cli.setNextReader(matchingDocs.context)) { + return; + } + + final int length = matchingDocs.bits.length(); + final int[] counts = facetArrays.getIntArray(); + int doc = 0; + while (doc < length && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) { + cli.getOrdinals(doc, ordinals); + final int upto = ordinals.offset + ordinals.length; + for (int i = ordinals.offset; i < upto; i++) { + ++counts[ordinals.ints[i]]; + } + ++doc; + } + } + + private int rollupCounts(int ordinal, int[] children, int[] siblings, int[] counts) { + int count = 0; + while (ordinal != TaxonomyReader.INVALID_ORDINAL) { + int childCount = counts[ordinal]; + childCount += rollupCounts(children[ordinal], children, siblings, counts); + counts[ordinal] = childCount; + count += childCount; + ordinal = siblings[ordinal]; + } + return count; + } + + @Override + public void rollupValues(int ordinal, int[] children, int[] siblings, FacetArrays facetArrays) { + final int[] counts = facetArrays.getIntArray(); + counts[ordinal] += rollupCounts(children[ordinal], children, siblings, counts); + } + + @Override + public final boolean requiresDocScores() { + return false; + } + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsAggregator.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsCollector.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsCollector.java (revision 1443446) +++ lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsCollector.java (working copy) @@ -1,363 +0,0 @@ -package org.apache.lucene.facet.search; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.List; -import java.util.Map.Entry; - -import org.apache.lucene.facet.index.params.CategoryListParams; -import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy; -import org.apache.lucene.facet.index.params.FacetIndexingParams; -import org.apache.lucene.facet.search.params.CountFacetRequest; -import org.apache.lucene.facet.search.params.FacetRequest; -import org.apache.lucene.facet.search.params.FacetRequest.SortBy; -import org.apache.lucene.facet.search.params.FacetRequest.SortOrder; -import org.apache.lucene.facet.search.params.FacetSearchParams; -import org.apache.lucene.facet.search.results.FacetResult; -import org.apache.lucene.facet.search.results.FacetResultNode; -import org.apache.lucene.facet.taxonomy.TaxonomyReader; -import org.apache.lucene.facet.taxonomy.directory.ParallelTaxonomyArrays; -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.DocValues; -import org.apache.lucene.index.DocValues.Source; -import org.apache.lucene.search.Collector; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.FixedBitSet; -import org.apache.lucene.util.PriorityQueue; -import org.apache.lucene.util.encoding.DGapVInt8IntDecoder; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * A {@link Collector} which counts facets associated with matching documents. - * This {@link Collector} can be used only in the following conditions: - *

    - *
  • All {@link FacetRequest requests} must be {@link CountFacetRequest}, with - * their {@link FacetRequest#getDepth() depth} equals to 1, and - * {@link FacetRequest#getNumLabel()} must be ≥ than - * {@link FacetRequest#getNumResults()}. Also, their sorting options must be - * {@link SortOrder#DESCENDING} and {@link SortBy#VALUE} (although ties are - * broken by ordinals). - *
  • Partitions should be disabled ( - * {@link FacetIndexingParams#getPartitionSize()} should return - * Integer.MAX_VALUE). - *
  • There can be only one {@link CategoryListParams} in the - * {@link FacetIndexingParams}, with {@link DGapVInt8IntDecoder}. - *
- * - *

- * NOTE: this colletro uses {@link DocValues#getSource()} by default, - * which pre-loads the values into memory. If your application cannot afford the - * RAM, you should use - * {@link #CountingFacetsCollector(FacetSearchParams, TaxonomyReader, FacetArrays, boolean)} - * and specify to use a direct source (corresponds to - * {@link DocValues#getDirectSource()}). - * - *

- * NOTE: this collector supports category lists that were indexed with - * {@link OrdinalPolicy#NO_PARENTS}, by counting up the parents too, after - * resolving the leafs counts. Note though that it is your responsibility to - * guarantee that indeed a document wasn't indexed with two categories that - * share a common parent, or otherwise the parent's count will be wrong. - * - * @lucene.experimental - */ -public class CountingFacetsCollector extends FacetsCollector { - - private final FacetSearchParams fsp; - private final CategoryListParams clp; - private final TaxonomyReader taxoReader; - private final BytesRef buf = new BytesRef(32); - private final FacetArrays facetArrays; - private final int[] counts; - private final String facetsField; - private final boolean useDirectSource; - private final HashMap matchingDocs = new HashMap(); - - private DocValues facetsValues; - private FixedBitSet bits; - - public CountingFacetsCollector(FacetSearchParams fsp, TaxonomyReader taxoReader) { - this(fsp, taxoReader, new FacetArrays(taxoReader.getSize()), false); - } - - public CountingFacetsCollector(FacetSearchParams fsp, TaxonomyReader taxoReader, FacetArrays facetArrays, - boolean useDirectSource) { - assert facetArrays.arrayLength >= taxoReader.getSize() : "too small facet array"; - assert assertParams(fsp) == null : assertParams(fsp); - - this.fsp = fsp; - this.clp = fsp.indexingParams.getCategoryListParams(fsp.facetRequests.get(0).categoryPath); - this.facetsField = clp.field; - this.taxoReader = taxoReader; - this.facetArrays = facetArrays; - this.counts = facetArrays.getIntArray(); - this.useDirectSource = useDirectSource; - } - - /** - * Asserts that this {@link FacetsCollector} can handle the given - * {@link FacetSearchParams}. Returns {@code null} if true, otherwise an error - * message. - */ - static String assertParams(FacetSearchParams fsp) { - // verify that all facet requests are CountFacetRequest - for (FacetRequest fr : fsp.facetRequests) { - if (!(fr instanceof CountFacetRequest)) { - return "all FacetRequests must be CountFacetRequest"; - } - if (fr.getDepth() != 1) { - return "all requests must be of depth 1"; - } - if (fr.getNumLabel() < fr.getNumResults()) { - return "this Collector always labels all requested results"; - } - if (fr.getSortOrder() != SortOrder.DESCENDING) { - return "this Collector always sorts results in descending order"; - } - if (fr.getSortBy() != SortBy.VALUE) { - return "this Collector always sorts by results' values"; - } - } - - // verify that there's only one CategoryListParams for all FacetRequests - CategoryListParams clp = null; - for (FacetRequest fr : fsp.facetRequests) { - CategoryListParams cpclp = fsp.indexingParams.getCategoryListParams(fr.categoryPath); - if (clp == null) { - clp = cpclp; - } else if (clp != cpclp) { - return "all FacetRequests must belong to the same CategoryListParams"; - } - } - if (clp == null) { - return "at least one FacetRequest must be defined"; - } - - // verify DGapVInt decoder - if (clp.createEncoder().createMatchingDecoder().getClass() != DGapVInt8IntDecoder.class) { - return "this Collector supports only DGap + VInt encoding"; - } - - // verify that partitions are disabled - if (fsp.indexingParams.getPartitionSize() != Integer.MAX_VALUE) { - return "this Collector does not support partitions"; - } - - return null; - } - - @Override - public void setNextReader(AtomicReaderContext context) throws IOException { - facetsValues = context.reader().docValues(facetsField); - if (facetsValues != null) { - Source facetSource = useDirectSource ? facetsValues.getDirectSource() : facetsValues.getSource(); - bits = new FixedBitSet(context.reader().maxDoc()); - matchingDocs.put(facetSource, bits); - } - } - - @Override - public void collect(int doc) throws IOException { - if (facetsValues == null) { - return; - } - - bits.set(doc); - } - - private void countFacets() { - for (Entry entry : matchingDocs.entrySet()) { - Source facetsSource = entry.getKey(); - FixedBitSet bits = entry.getValue(); - int doc = 0; - int length = bits.length(); - while (doc < length && (doc = bits.nextSetBit(doc)) != -1) { - facetsSource .getBytes(doc, buf); - if (buf.length > 0) { - // this document has facets - int upto = buf.offset + buf.length; - int ord = 0; - int offset = buf.offset; - int prev = 0; - while (offset < upto) { - byte b = buf.bytes[offset++]; - if (b >= 0) { - prev = ord = ((ord << 7) | b) + prev; - counts[ord]++; - ord = 0; - } else { - ord = (ord << 7) | (b & 0x7F); - } - } - } - ++doc; - } - } - } - - /** - * Computes the counts of ordinals under the given ordinal's tree, by - * recursively going down to leaf nodes and rollin up their counts (called - * only with categories are indexing with OrdinalPolicy.NO_PARENTS). - */ - private int rollupCounts(int ordinal, int[] children, int[] siblings) { - int count = 0; - while (ordinal != TaxonomyReader.INVALID_ORDINAL) { - int childCount = counts[ordinal]; - childCount += rollupCounts(children[ordinal], children, siblings); - counts[ordinal] = childCount; - count += childCount; - ordinal = siblings[ordinal]; - } - return count; - } - - @Override - public synchronized List getFacetResults() throws IOException { - try { - // first, count matching documents' facets - countFacets(); - - ParallelTaxonomyArrays arrays = taxoReader.getParallelTaxonomyArrays(); - - // compute top-K - final int[] children = arrays.children(); - final int[] siblings = arrays.siblings(); - List res = new ArrayList(); - for (FacetRequest fr : fsp.facetRequests) { - int rootOrd = taxoReader.getOrdinal(fr.categoryPath); - if (rootOrd == TaxonomyReader.INVALID_ORDINAL) { // category does not exist - continue; - } - OrdinalPolicy ordinalPolicy = clp.getOrdinalPolicy(fr.categoryPath.components[0]); - if (ordinalPolicy == OrdinalPolicy.NO_PARENTS) { - // need to count parents - counts[rootOrd] += rollupCounts(children[rootOrd], children, siblings); - } - - FacetResultNode root = new FacetResultNode(); - root.ordinal = rootOrd; - root.label = fr.categoryPath; - root.value = counts[rootOrd]; - if (fr.getNumResults() > taxoReader.getSize()) { - // specialize this case, user is interested in all available results - ArrayList nodes = new ArrayList(); - int child = children[rootOrd]; - while (child != TaxonomyReader.INVALID_ORDINAL) { - int count = counts[child]; - if (count > 0) { - FacetResultNode node = new FacetResultNode(); - node.label = taxoReader.getPath(child); - node.value = count; - nodes.add(node); - } - child = siblings[child]; - } - Collections.sort(nodes, new Comparator() { - @Override - public int compare(FacetResultNode o1, FacetResultNode o2) { - int value = (int) (o2.value - o1.value); - if (value == 0) { - value = o2.ordinal - o1.ordinal; - } - return value; - } - }); - - root.subResults = nodes; - res.add(new FacetResult(fr, root, nodes.size())); - continue; - } - - // since we use sentinel objects, we cannot reuse PQ. but that's ok because it's not big - FacetResultNodeQueue pq = new FacetResultNodeQueue(fr.getNumResults(), true); - FacetResultNode top = pq.top(); - int child = children[rootOrd]; - int numResults = 0; // count the number of results - while (child != TaxonomyReader.INVALID_ORDINAL) { - int count = counts[child]; - if (count > top.value) { - top.value = count; - top.ordinal = child; - top = pq.updateTop(); - ++numResults; - } - child = siblings[child]; - } - - // pop() the least (sentinel) elements - int pqsize = pq.size(); - int size = numResults < pqsize ? numResults : pqsize; - for (int i = pqsize - size; i > 0; i--) { pq.pop(); } - - // create the FacetResultNodes. - FacetResultNode[] subResults = new FacetResultNode[size]; - for (int i = size - 1; i >= 0; i--) { - FacetResultNode node = pq.pop(); - node.label = taxoReader.getPath(node.ordinal); - subResults[i] = node; - } - root.subResults = Arrays.asList(subResults); - res.add(new FacetResult(fr, root, size)); - } - return res; - } finally { - facetArrays.free(); - } - } - - @Override - public boolean acceptsDocsOutOfOrder() { - // the actual work is done post-collection, so we always support out-of-order. - return true; - } - - @Override - public void setScorer(Scorer scorer) throws IOException { - } - - // TODO: review ResultSortUtils queues and check if we can reuse any of them here - // and then alleviate the SortOrder/SortBy constraint - private static class FacetResultNodeQueue extends PriorityQueue { - - public FacetResultNodeQueue(int maxSize, boolean prepopulate) { - super(maxSize, prepopulate); - } - - @Override - protected FacetResultNode getSentinelObject() { - return new FacetResultNode(); - } - - @Override - protected boolean lessThan(FacetResultNode a, FacetResultNode b) { - if (a.value < b.value) return true; - if (a.value > b.value) return false; - // both have the same value, break tie by ordinal - return a.ordinal < b.ordinal; - } - - } - -} Index: lucene/facet/src/java/org/apache/lucene/facet/search/DepthOneFacetResultsHandler.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/DepthOneFacetResultsHandler.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/search/DepthOneFacetResultsHandler.java (working copy) @@ -0,0 +1,144 @@ +package org.apache.lucene.facet.search; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; + +import org.apache.lucene.facet.search.params.FacetRequest; +import org.apache.lucene.facet.search.params.FacetRequest.SortBy; +import org.apache.lucene.facet.search.params.FacetRequest.SortOrder; +import org.apache.lucene.facet.search.results.FacetResult; +import org.apache.lucene.facet.search.results.FacetResultNode; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.facet.taxonomy.directory.ParallelTaxonomyArrays; +import org.apache.lucene.util.PriorityQueue; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A {@link FacetResultsHandler} which counts the top-K facets at depth 1 only + * and always labels all result categories. The results are always sorted by + * value, in descending order. Sub-classes are responsible to pull the values + * from the corresponding {@link FacetArrays}. + * + * @lucene.experimental + */ +public abstract class DepthOneFacetResultsHandler extends FacetResultsHandler { + + private static class FacetResultNodeQueue extends PriorityQueue { + + public FacetResultNodeQueue(int maxSize, boolean prepopulate) { + super(maxSize, prepopulate); + } + + @Override + protected FacetResultNode getSentinelObject() { + return new FacetResultNode(); + } + + @Override + protected boolean lessThan(FacetResultNode a, FacetResultNode b) { + if (a.value < b.value) return true; + if (a.value > b.value) return false; + // both have the same value, break tie by ordinal + return a.ordinal < b.ordinal; + } + + } + + public DepthOneFacetResultsHandler(TaxonomyReader taxonomyReader, FacetRequest facetRequest, FacetArrays facetArrays) { + super(taxonomyReader, facetRequest, facetArrays); + assert facetRequest.getDepth() == 1 : "this handler only computes the top-K facets at depth 1"; + assert facetRequest.numResults == facetRequest.getNumLabel() : "this handler always labels all top-K results"; + assert facetRequest.getSortOrder() == SortOrder.DESCENDING : "this handler always sorts results in descending order"; + assert facetRequest.getSortBy() == SortBy.VALUE : "this handler always sorts results by value"; + } + + /** Returnt the value of the requested ordinal. Called once for the result root. */ + protected abstract double valueOf(int ordinal); + + /** + * Add the siblings of {@code ordinal} to the given list. This is called + * whenever the number of results is too high (> taxonomy size), instead of + * adding them to a {@link PriorityQueue}. + */ + protected abstract void addSiblings(int ordinal, int[] siblings, ArrayList nodes) throws IOException; + + /** + * Add the siblings of {@code ordinal} to the given {@link PriorityQueue}. The + * given {@link PriorityQueue} is already filled with sentinel objects, so + * implementations are encouraged to use {@link PriorityQueue#top()} and + * {@link PriorityQueue#updateTop()} for best performance. + */ + protected abstract int addSiblings(int ordinal, int[] siblings, PriorityQueue pq); + + @Override + public final FacetResult compute() throws IOException { + ParallelTaxonomyArrays arrays = taxonomyReader.getParallelTaxonomyArrays(); + final int[] children = arrays.children(); + final int[] siblings = arrays.siblings(); + + int rootOrd = taxonomyReader.getOrdinal(facetRequest.categoryPath); + + FacetResultNode root = new FacetResultNode(); + root.ordinal = rootOrd; + root.label = facetRequest.categoryPath; + root.value = valueOf(rootOrd); + if (facetRequest.numResults > taxonomyReader.getSize()) { + // specialize this case, user is interested in all available results + ArrayList nodes = new ArrayList(); + int child = children[rootOrd]; + addSiblings(child, siblings, nodes); + Collections.sort(nodes, new Comparator() { + @Override + public int compare(FacetResultNode o1, FacetResultNode o2) { + int value = (int) (o2.value - o1.value); + if (value == 0) { + value = o2.ordinal - o1.ordinal; + } + return value; + } + }); + + root.subResults = nodes; + return new FacetResult(facetRequest, root, nodes.size()); + } + + // since we use sentinel objects, we cannot reuse PQ. but that's ok because it's not big + PriorityQueue pq = new FacetResultNodeQueue(facetRequest.numResults, true); + int numResults = addSiblings(children[rootOrd], siblings, pq); + + // pop() the least (sentinel) elements + int pqsize = pq.size(); + int size = numResults < pqsize ? numResults : pqsize; + for (int i = pqsize - size; i > 0; i--) { pq.pop(); } + + // create the FacetResultNodes. + FacetResultNode[] subResults = new FacetResultNode[size]; + for (int i = size - 1; i >= 0; i--) { + FacetResultNode node = pq.pop(); + node.label = taxonomyReader.getPath(node.ordinal); + subResults[i] = node; + } + root.subResults = Arrays.asList(subResults); + return new FacetResult(facetRequest, root, size); + } + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/search/DepthOneFacetResultsHandler.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/search/DocValuesCategoryListIterator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/DocValuesCategoryListIterator.java (revision 1443446) +++ lucene/facet/src/java/org/apache/lucene/facet/search/DocValuesCategoryListIterator.java (working copy) @@ -90,6 +90,8 @@ @Override public void getOrdinals(int docID, IntsRef ints) throws IOException { + assert current != null : "don't call this if setNextReader returned false"; + current.getBytes(docID, bytes); ints.length = 0; if (bytes.length > 0) { Index: lucene/facet/src/java/org/apache/lucene/facet/search/FacetResultsHandler.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/FacetResultsHandler.java (revision 1443446) +++ lucene/facet/src/java/org/apache/lucene/facet/search/FacetResultsHandler.java (working copy) @@ -4,8 +4,6 @@ import org.apache.lucene.facet.search.params.FacetRequest; import org.apache.lucene.facet.search.results.FacetResult; -import org.apache.lucene.facet.search.results.FacetResultNode; -import org.apache.lucene.facet.search.results.IntermediateFacetResult; import org.apache.lucene.facet.taxonomy.TaxonomyReader; /* @@ -26,136 +24,25 @@ */ /** - * Handler for facet results. - *

- * The facet results handler provided by the {@link FacetRequest} to - * a {@link FacetsAccumulator}. - *

- * First it is used by {@link FacetsAccumulator} to obtain a temporary - * facet result for each partition and to merge results of several partitions. - *

- * Later the accumulator invokes the handler to render the results, creating - * {@link FacetResult} objects. - *

- * Last the accumulator invokes the handler to label final results. + * Computes the top categories for a given {@link FacetRequest}. * * @lucene.experimental */ public abstract class FacetResultsHandler { - /** Taxonomy for which facets are handled */ - protected final TaxonomyReader taxonomyReader; + public final TaxonomyReader taxonomyReader; - /** - * Facet request served by this handler. - */ - protected final FacetRequest facetRequest; + public final FacetRequest facetRequest; + + protected final FacetArrays facetArrays; - /** - * Create a faceted search handler. - * @param taxonomyReader See {@link #getTaxonomyReader()}. - * @param facetRequest See {@link #getFacetRequest()}. - */ - public FacetResultsHandler(TaxonomyReader taxonomyReader, - FacetRequest facetRequest) { + public FacetResultsHandler(TaxonomyReader taxonomyReader, FacetRequest facetRequest, FacetArrays facetArrays) { this.taxonomyReader = taxonomyReader; this.facetRequest = facetRequest; + this.facetArrays = facetArrays; } - /** - * Fetch results of a single partition, given facet arrays for that partition, - * and based on the matching documents and faceted search parameters. - * - * @param arrays - * facet arrays for the certain partition - * @param offset - * offset in input arrays where partition starts - * @return temporary facet result, potentially, to be passed back to - * this result handler for merging, or null in case that - * constructor parameter, facetRequest, requests an - * illegal FacetResult, like, e.g., a root node category path that - * does not exist in constructor parameter taxonomyReader - * . - * @throws IOException - * on error - */ - public abstract IntermediateFacetResult fetchPartitionResult(FacetArrays arrays, int offset) throws IOException; - - /** - * Merge results of several facet partitions. Logic of the merge is undefined - * and open for interpretations. For example, a merge implementation could - * keep top K results. Passed {@link IntermediateFacetResult} must be ones - * that were created by this handler otherwise a {@link ClassCastException} is - * thrown. In addition, all passed {@link IntermediateFacetResult} must have - * the same {@link FacetRequest} otherwise an {@link IllegalArgumentException} - * is thrown. - * - * @param tmpResults one or more temporary results created by this - * handler. - * @return temporary facet result that represents to union, as specified by - * this handler, of the input temporary facet results. - * @throws IOException on error. - * @throws ClassCastException if the temporary result passed was not created - * by this handler - * @throws IllegalArgumentException if passed facetResults do not - * have the same {@link FacetRequest} - * @see IntermediateFacetResult#getFacetRequest() - */ - public abstract IntermediateFacetResult mergeResults(IntermediateFacetResult... tmpResults) - throws IOException, ClassCastException, IllegalArgumentException; - - /** - * Create a facet result from the temporary result. - * @param tmpResult temporary result to be rendered as a {@link FacetResult} - * @throws IOException on error. - */ - public abstract FacetResult renderFacetResult(IntermediateFacetResult tmpResult) throws IOException ; - - /** - * Perform any rearrangement as required on a facet result that has changed after - * it was rendered. - *

- * Possible use case: a sampling facets accumulator invoked another - * other facets accumulator on a sample set of documents, obtained - * rendered facet results, fixed their counts, and now it is needed - * to sort the results differently according to the fixed counts. - * @param facetResult result to be rearranged. - * @see FacetResultNode#value - */ - public abstract FacetResult rearrangeFacetResult(FacetResult facetResult); - - /** - * Label results according to settings in {@link FacetRequest}, - * such as {@link FacetRequest#getNumLabel()}. - * Usually invoked by {@link FacetsAccumulator#accumulate(ScoredDocIDs)} - * @param facetResult facet result to be labeled. - * @throws IOException on error - */ - public abstract void labelResult (FacetResult facetResult) throws IOException; - - /** Return taxonomy reader used for current facets accumulation operation. */ - public final TaxonomyReader getTaxonomyReader() { - return this.taxonomyReader; - } - - /** Return the facet request served by this handler. */ - public final FacetRequest getFacetRequest() { - return this.facetRequest; - } - - /** - * Check if an array contains the partition which contains ordinal - * - * @param ordinal - * checked facet - * @param facetArrays - * facet arrays for the certain partition - * @param offset - * offset in input arrays where partition starts - */ - protected boolean isSelfPartition (int ordinal, FacetArrays facetArrays, int offset) { - int partitionSize = facetArrays.arrayLength; - return ordinal / partitionSize == offset / partitionSize; - } - + /** Computes the {@link FacetResult} for the given {@link FacetArrays}. */ + public abstract FacetResult compute() throws IOException; + } Index: lucene/facet/src/java/org/apache/lucene/facet/search/FacetsAccumulator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/FacetsAccumulator.java (revision 1443446) +++ lucene/facet/src/java/org/apache/lucene/facet/search/FacetsAccumulator.java (working copy) @@ -1,14 +1,26 @@ package org.apache.lucene.facet.search; import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; import java.util.List; +import java.util.Set; -import org.apache.lucene.index.IndexReader; - +import org.apache.lucene.facet.index.params.CategoryListParams; +import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy; +import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; +import org.apache.lucene.facet.search.params.FacetRequest; import org.apache.lucene.facet.search.params.FacetSearchParams; -import org.apache.lucene.facet.search.params.FacetRequest; +import org.apache.lucene.facet.search.params.FacetRequest.FacetArraysSource; +import org.apache.lucene.facet.search.params.FacetRequest.ResultMode; +import org.apache.lucene.facet.search.params.FacetRequest.SortBy; +import org.apache.lucene.facet.search.params.FacetRequest.SortOrder; import org.apache.lucene.facet.search.results.FacetResult; import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.facet.taxonomy.directory.ParallelTaxonomyArrays; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.encoding.DGapVInt8IntDecoder; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -33,116 +45,130 @@ * * @lucene.experimental */ -public abstract class FacetsAccumulator { +public class FacetsAccumulator { - /** - * Default threshold for using the complements optimization. - * If accumulating facets for a document set larger than this ratio of the index size than - * perform the complement optimization. - * @see #setComplementThreshold(double) for more info on the complements optimization. - */ - public static final double DEFAULT_COMPLEMENT_THRESHOLD = 0.6; + protected final TaxonomyReader taxonomyReader; + protected final IndexReader indexReader; + protected final FacetArrays facetArrays; + protected FacetSearchParams searchParams; /** - * Passing this to {@link #setComplementThreshold(double)} will disable using complement optimization. + * Initializes the accumulator with the given search params, index reader and + * taxonomy reader. This constructor creates the default {@link FacetArrays}, + * which do not support reuse. If you want to use {@link ReusingFacetArrays}, + * you should use the + * {@link #FacetsAccumulator(FacetSearchParams, IndexReader, TaxonomyReader, FacetArrays)} + * constructor. */ - public static final double DISABLE_COMPLEMENT = Double.POSITIVE_INFINITY; // > 1 actually - + public FacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader, TaxonomyReader taxonomyReader) { + this(searchParams, indexReader, taxonomyReader, null); + } + /** - * Passing this to {@link #setComplementThreshold(double)} will force using complement optimization. + * Initializes the accumulator with the given parameters as well as + * {@link FacetArrays}. Note that the accumulator doesn't call + * {@link FacetArrays#free()}. If you require that (only makes sense if you + * use {@link ReusingFacetArrays}, you should do it after you've finished with + * the accumulator. */ - public static final double FORCE_COMPLEMENT = 0; // <=0 - - private double complementThreshold = DEFAULT_COMPLEMENT_THRESHOLD; - - protected final TaxonomyReader taxonomyReader; - protected final IndexReader indexReader; - protected FacetSearchParams searchParams; - - private boolean allowLabeling = true; - - public FacetsAccumulator(FacetSearchParams searchParams, - IndexReader indexReader, - TaxonomyReader taxonomyReader) { + public FacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader, TaxonomyReader taxonomyReader, + FacetArrays facetArrays) { + if (facetArrays == null) { + facetArrays = new FacetArrays(taxonomyReader.getSize()); + } + this.facetArrays = facetArrays; this.indexReader = indexReader; this.taxonomyReader = taxonomyReader; this.searchParams = searchParams; } - + /** - * Accumulate facets over given documents, according to facet requests in effect. - * @param docids documents (and their scores) for which facets are Accumulated. - * @return Accumulated facets. - * @throws IOException on error. + * Returns the {@link FacetsAggregator} to use for aggregating the categories + * found in the result documents. The default implementation returns + * {@link CountingFacetsAggregator}, or {@link FastCountingFacetsAggregator} + * if all categories can be decoded with {@link DGapVInt8IntDecoder}. */ - // internal API note: it was considered to move the docids into the constructor as well, - // but this prevents nice extension capabilities, especially in the way that - // Sampling Accumulator works with the (any) delegated accumulator. - public abstract List accumulate(ScoredDocIDs docids) throws IOException; - + public FacetsAggregator getAggregator() { + if (FastCountingFacetsAggregator.verifySearchParams(searchParams)) { + return new FastCountingFacetsAggregator(); + } else { + return new CountingFacetsAggregator(); + } + } + /** - * Returns the complement threshold. - * @see #setComplementThreshold(double) + * Creates a {@link FacetResultsHandler} that matches the given + * {@link FacetRequest}. */ - public double getComplementThreshold() { - return complementThreshold; - } + protected FacetResultsHandler createFacetResultsHandler(FacetRequest fr) { + if (fr.getDepth() == 1 && fr.getSortOrder() == SortOrder.DESCENDING && fr.getSortBy() == SortBy.VALUE) { + FacetArraysSource fas = fr.getFacetArraysSource(); + if (fas == FacetArraysSource.INT) { + return new IntFacetResultsHandler(taxonomyReader, fr, facetArrays); + } + + if (fas == FacetArraysSource.FLOAT) { + return new FloatFacetResultsHandler(taxonomyReader, fr, facetArrays); + } + } - /** - * Set the complement threshold. - * This threshold will dictate whether the complements optimization is applied. - * The optimization is to count for less documents. It is useful when the same - * FacetSearchParams are used for varying sets of documents. The first time - * complements is used the "total counts" are computed - counting for all the - * documents in the collection. Then, only the complementing set of documents - * is considered, and used to decrement from the overall counts, thereby - * walking through less documents, which is faster. - *

- * For the default settings see {@link #DEFAULT_COMPLEMENT_THRESHOLD}. - *

- * To forcing complements in all cases pass {@link #FORCE_COMPLEMENT}. - * This is mostly useful for testing purposes, as forcing complements when only - * tiny fraction of available documents match the query does not make sense and - * would incur performance degradations. - *

- * To disable complements pass {@link #DISABLE_COMPLEMENT}. - * @param complementThreshold the complement threshold to set - * @see #getComplementThreshold() - */ - public void setComplementThreshold(double complementThreshold) { - this.complementThreshold = complementThreshold; + if (fr.getResultMode() == ResultMode.PER_NODE_IN_TREE) { + return new TopKInEachNodeHandler(taxonomyReader, fr, facetArrays); + } + return new TopKFacetResultsHandler(taxonomyReader, fr, facetArrays); } - /** - * Check if labeling is allowed for this accumulator. - *

- * By default labeling is allowed. - * This allows one accumulator to invoke other accumulators for accumulation - * but keep to itself the responsibility of labeling. - * This might br handy since labeling is a costly operation. - * @return true of labeling is allowed for this accumulator - * @see #setAllowLabeling(boolean) - */ - protected boolean isAllowLabeling() { - return allowLabeling; + protected Set getCategoryLists() { + if (searchParams.indexingParams.getAllCategoryListParams().size() == 1) { + return Collections.singleton(searchParams.indexingParams.getCategoryListParams(null)); + } + + HashSet clps = new HashSet(); + for (FacetRequest fr : searchParams.facetRequests) { + clps.add(searchParams.indexingParams.getCategoryListParams(fr.categoryPath)); + } + return clps; } /** - * Set whether labeling is allowed for this accumulator. - * @param allowLabeling new setting for allow labeling - * @see #isAllowLabeling() + * Used by {@link FacetsCollector} to build the list of {@link FacetResult + * facet results} that match the {@link FacetRequest facet requests} that were + * given in the constructor. + * + * @param matchingDocs + * the documents that matched the query, per-segment. */ - protected void setAllowLabeling(boolean allowLabeling) { - this.allowLabeling = allowLabeling; - } - - /** check if all requests are complementable */ - protected boolean mayComplement() { - for (FacetRequest freq:searchParams.facetRequests) { - if (!freq.supportsComplements()) { - return false; + public List accumulate(List matchingDocs) throws IOException { + // aggregate facets per category list (usually onle one category list) + FacetsAggregator aggregator = getAggregator(); + for (CategoryListParams clp : getCategoryLists()) { + for (MatchingDocs md : matchingDocs) { + aggregator.aggregate(md, clp, facetArrays); } } - return true; + + ParallelTaxonomyArrays arrays = taxonomyReader.getParallelTaxonomyArrays(); + + // compute top-K + final int[] children = arrays.children(); + final int[] siblings = arrays.siblings(); + List res = new ArrayList(); + for (FacetRequest fr : searchParams.facetRequests) { + int rootOrd = taxonomyReader.getOrdinal(fr.categoryPath); + if (rootOrd == TaxonomyReader.INVALID_ORDINAL) { // category does not exist + continue; + } + CategoryListParams clp = searchParams.indexingParams.getCategoryListParams(fr.categoryPath); + OrdinalPolicy ordinalPolicy = clp .getOrdinalPolicy(fr.categoryPath.components[0]); + if (ordinalPolicy == OrdinalPolicy.NO_PARENTS) { + // rollup values + aggregator.rollupValues(rootOrd, children, siblings, facetArrays); + } + + FacetResultsHandler frh = createFacetResultsHandler(fr); + res.add(frh.compute()); + } + return res; } -} \ No newline at end of file + +} Index: lucene/facet/src/java/org/apache/lucene/facet/search/FacetsAggregator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/FacetsAggregator.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/search/FacetsAggregator.java (working copy) @@ -0,0 +1,49 @@ +package org.apache.lucene.facet.search; + +import java.io.IOException; + +import org.apache.lucene.facet.index.params.CategoryListParams; +import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy; +import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Aggregates categories that were found in result documents (specified by + * {@link MatchingDocs}). If the aggregator requires document scores too, it + * should return {@code true} from {@link #requiresDocScores()}. + * + * @lucene.experimental + */ +public interface FacetsAggregator { + + /** Aggregate the facets found in the given matching documents. */ + public void aggregate(MatchingDocs matchingDocs, CategoryListParams clp, FacetArrays facetArrays) throws IOException; + + /** + * Rollup the values of the given ordinal. This method is called when a + * category was indexed with {@link OrdinalPolicy#NO_PARENTS}. The given + * ordinal is the requested category, and you should use the children and + * siblings arrays to traverse its sub-tree. + */ + public void rollupValues(int ordinal, int[] children, int[] siblings, FacetArrays facetArrays); + + /** Returns {@code true} if this aggregator requires document scores. */ + public boolean requiresDocScores(); + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/search/FacetsAggregator.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/search/FacetsCollector.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/FacetsCollector.java (revision 1443446) +++ lucene/facet/src/java/org/apache/lucene/facet/search/FacetsCollector.java (working copy) @@ -1,15 +1,21 @@ package org.apache.lucene.facet.search; import java.io.IOException; +import java.util.ArrayList; import java.util.List; +import org.apache.lucene.facet.search.params.CountFacetRequest; import org.apache.lucene.facet.search.params.FacetRequest; import org.apache.lucene.facet.search.params.FacetSearchParams; import org.apache.lucene.facet.search.results.FacetResult; import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Collector; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.FixedBitSet; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -38,34 +44,199 @@ * @lucene.experimental */ public abstract class FacetsCollector extends Collector { + + private static final class DocsAndScoresCollector extends FacetsCollector { + + private AtomicReaderContext context; + private Scorer scorer; + private FixedBitSet bits; + private int totalHits; + private float[] scores; + + public DocsAndScoresCollector(FacetsAccumulator accumulator) { + super(accumulator); + } + + @Override + protected final void finish() { + if (bits != null) { + matchingDocs.add(new MatchingDocs(this.context, bits, totalHits, scores)); + bits = null; + scores = null; + context = null; + } + } + + @Override + public final boolean acceptsDocsOutOfOrder() { + return false; + } + + @Override + public final void collect(int doc) throws IOException { + bits.set(doc); + if (totalHits >= scores.length) { + float[] newScores = new float[ArrayUtil.oversize(totalHits + 1, 4)]; + System.arraycopy(scores, 0, newScores, 0, totalHits); + scores = newScores; + } + scores[totalHits] = scorer.score(); + totalHits++; + } + + @Override + public final void setScorer(Scorer scorer) throws IOException { + this.scorer = scorer; + } + + @Override + public final void setNextReader(AtomicReaderContext context) throws IOException { + if (bits != null) { + matchingDocs.add(new MatchingDocs(this.context, bits, totalHits, scores)); + } + bits = new FixedBitSet(context.reader().maxDoc()); + totalHits = 0; + scores = new float[64]; // some initial size + this.context = context; + } + + } + + private final static class DocsOnlyCollector extends FacetsCollector { + + private AtomicReaderContext context; + private FixedBitSet bits; + private int totalHits; + + public DocsOnlyCollector(FacetsAccumulator accumulator) { + super(accumulator); + } + + @Override + protected final void finish() { + if (bits != null) { + matchingDocs.add(new MatchingDocs(this.context, bits, totalHits, null)); + bits = null; + context = null; + } + } + + @Override + public final boolean acceptsDocsOutOfOrder() { + return true; + } + + @Override + public final void collect(int doc) throws IOException { + totalHits++; + bits.set(doc); + } + + @Override + public final void setScorer(Scorer scorer) throws IOException {} + + @Override + public final void setNextReader(AtomicReaderContext context) throws IOException { + if (bits != null) { + matchingDocs.add(new MatchingDocs(this.context, bits, totalHits, null)); + } + bits = new FixedBitSet(context.reader().maxDoc()); + totalHits = 0; + this.context = context; + } + } /** - * Returns the most optimized {@link FacetsCollector} for the given search - * parameters. The returned {@link FacetsCollector} is guaranteed to satisfy - * the requested parameters. - * - * @throws IllegalArgumentException - * if there is no built-in collector that can satisfy the search - * parameters. + * Holds the documents that were matched in the {@link AtomicReaderContext}. + * If scores were required, then {@code scores} is not null. */ + public final static class MatchingDocs { + + public final AtomicReaderContext context; + public final FixedBitSet bits; + public final float[] scores; + public final int totalHits; + + public MatchingDocs(AtomicReaderContext context, FixedBitSet bits, int totalHits, float[] scores) { + this.context = context; + this.bits = bits; + this.scores = scores; + this.totalHits = totalHits; + } + } + + /** + * Creates a {@link FacetsCollector} with the default + * {@link FacetsAccumulator}. + */ public static FacetsCollector create(FacetSearchParams fsp, IndexReader indexReader, TaxonomyReader taxoReader) { - if (CountingFacetsCollector.assertParams(fsp) == null) { - return new CountingFacetsCollector(fsp, taxoReader); + if (fsp.indexingParams.getPartitionSize() != Integer.MAX_VALUE) { + return create(new StandardFacetsAccumulator(fsp, indexReader, taxoReader)); } - if (StandardFacetsCollector.assertParams(fsp) == null) { - return new StandardFacetsCollector(fsp, indexReader, taxoReader); + for (FacetRequest fr : fsp.facetRequests) { + if (!(fr instanceof CountFacetRequest)) { + return create(new StandardFacetsAccumulator(fsp, indexReader, taxoReader)); + } } - throw new IllegalArgumentException("None of the built-in FacetsCollectors can handle the given search params"); + return create(new FacetsAccumulator(fsp, indexReader, taxoReader)); } + + /** + * Creates a {@link FacetsCollector} that satisfies the requirements of the + * given {@link FacetsAccumulator}. + */ + public static FacetsCollector create(FacetsAccumulator accumulator) { + if (accumulator.getAggregator().requiresDocScores()) { + return new DocsAndScoresCollector(accumulator); + } else { + return new DocsOnlyCollector(accumulator); + } + } + + private final FacetsAccumulator accumulator; + protected final List matchingDocs = new ArrayList(); + + protected FacetsCollector(FacetsAccumulator accumulator) { + this.accumulator = accumulator; + } + /** + * Called when the Collector has finished, so that the last + * {@link MatchingDocs} can be added. + */ + protected abstract void finish(); + + /** * Returns a {@link FacetResult} per {@link FacetRequest} set in * {@link FacetSearchParams}. Note that if one of the {@link FacetRequest * requests} is for a {@link CategoryPath} that does not exist in the taxonomy, * no matching {@link FacetResult} will be returned. */ - public abstract List getFacetResults() throws IOException; + public final List getFacetResults() throws IOException { + finish(); + return accumulator.accumulate(matchingDocs); + } + + /** + * Returns the documents matched by the query, one {@link MatchingDocs} per + * visited segment. + */ + public final List getMatchingDocs() { + finish(); + return matchingDocs; + } + + /** + * Allows to reuse the collector between search requests. This method simply + * clears all collected documents (and scores) information, and does not + * attempt to reuse allocated memory spaces. + */ + public final void reset() { + finish(); + matchingDocs.clear(); + } } Index: lucene/facet/src/java/org/apache/lucene/facet/search/FastCountingFacetsAggregator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/FastCountingFacetsAggregator.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/search/FastCountingFacetsAggregator.java (working copy) @@ -0,0 +1,133 @@ +package org.apache.lucene.facet.search; + +import java.io.IOException; + +import org.apache.lucene.facet.index.params.CategoryListParams; +import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; +import org.apache.lucene.facet.search.params.FacetRequest; +import org.apache.lucene.facet.search.params.FacetSearchParams; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocValues.Source; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.encoding.DGapVInt8IntDecoder; +import org.apache.lucene.util.encoding.DGapVInt8IntEncoder; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A {@link FacetsAggregator} which counts the number of times each category + * appears in the given set of documents. This aggregator reads the categories + * from the {@link DocValues} field defined by {@link CategoryListParams#field}, + * and assumes that the category ordinals were encoded with + * {@link DGapVInt8IntEncoder}. + * + * @lucene.experimental + */ +public final class FastCountingFacetsAggregator implements FacetsAggregator { + + private final BytesRef buf = new BytesRef(32); + + /** + * Asserts that this {@link FacetsCollector} can handle the given + * {@link FacetSearchParams}. Returns {@code null} if true, otherwise an error + * message. + */ + final static boolean verifySearchParams(FacetSearchParams fsp) { + // verify that all category lists were encoded with DGapVInt + for (FacetRequest fr : fsp.facetRequests) { + CategoryListParams clp = fsp.indexingParams.getCategoryListParams(fr.categoryPath); + if (clp.createEncoder().createMatchingDecoder().getClass() != DGapVInt8IntDecoder.class) { + return false; + } + } + + return true; + } + + /** + * Returns the {@link Source} for the given {@link DocValues}. By default + * returns {@link DocValues#getSource()}, but extensions can override to + * return {@link DocValues#getDirectSource()}. + */ + protected Source getSource(DocValues docValues) throws IOException { + return docValues.getSource(); + } + + @Override + public final void aggregate(MatchingDocs matchingDocs, CategoryListParams clp, FacetArrays facetArrays) + throws IOException { + assert clp.createEncoder().createMatchingDecoder().getClass() == DGapVInt8IntDecoder.class + : "this aggregator assumes ordinals were encoded as dgap+vint"; + + final DocValues dv = matchingDocs.context.reader().docValues(clp.field); + if (dv == null) { // this reader does not have DocValues for the requested category list + return; + } + + final Source facetsSource = getSource(dv); + final int length = matchingDocs.bits.length(); + final int[] counts = facetArrays.getIntArray(); + int doc = 0; + while (doc < length && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) { + facetsSource.getBytes(doc, buf); + if (buf.length > 0) { + // this document has facets + final int upto = buf.offset + buf.length; + int ord = 0; + int offset = buf.offset; + int prev = 0; + while (offset < upto) { + byte b = buf.bytes[offset++]; + if (b >= 0) { + prev = ord = ((ord << 7) | b) + prev; + ++counts[ord]; + ord = 0; + } else { + ord = (ord << 7) | (b & 0x7F); + } + } + } + ++doc; + } + } + + private int rollupCounts(int ordinal, int[] children, int[] siblings, int[] counts) { + int count = 0; + while (ordinal != TaxonomyReader.INVALID_ORDINAL) { + int childCount = counts[ordinal]; + childCount += rollupCounts(children[ordinal], children, siblings, counts); + counts[ordinal] = childCount; + count += childCount; + ordinal = siblings[ordinal]; + } + return count; + } + + @Override + public final void rollupValues(int ordinal, int[] children, int[] siblings, FacetArrays facetArrays) { + final int[] counts = facetArrays.getIntArray(); + counts[ordinal] += rollupCounts(children[ordinal], children, siblings, counts); + } + + @Override + public final boolean requiresDocScores() { + return false; + } + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/search/FastCountingFacetsAggregator.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/search/FloatFacetResultsHandler.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/FloatFacetResultsHandler.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/search/FloatFacetResultsHandler.java (working copy) @@ -0,0 +1,80 @@ +package org.apache.lucene.facet.search; + +import java.io.IOException; +import java.util.ArrayList; + +import org.apache.lucene.facet.search.params.FacetRequest; +import org.apache.lucene.facet.search.results.FacetResultNode; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.util.PriorityQueue; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A {@link DepthOneFacetResultsHandler} which fills the categories values from + * {@link FacetArrays#getFloatArray()}. + * + * @lucene.experimental + */ +public final class FloatFacetResultsHandler extends DepthOneFacetResultsHandler { + + private final float[] values; + + public FloatFacetResultsHandler(TaxonomyReader taxonomyReader, FacetRequest facetRequest, FacetArrays facetArrays) { + super(taxonomyReader, facetRequest, facetArrays); + this.values = facetArrays.getFloatArray(); + } + + @Override + protected final double valueOf(int ordinal) { + return values[ordinal]; + } + + + @Override + protected final int addSiblings(int ordinal, int[] siblings, PriorityQueue pq) { + FacetResultNode top = pq.top(); + int numResults = 0; + while (ordinal != TaxonomyReader.INVALID_ORDINAL) { + float value = values[ordinal]; + if (value > top.value) { + top.value = value; + top.ordinal = ordinal; + top = pq.updateTop(); + ++numResults; + } + ordinal = siblings[ordinal]; + } + return numResults; + } + + @Override + protected final void addSiblings(int ordinal, int[] siblings, ArrayList nodes) throws IOException { + while (ordinal != TaxonomyReader.INVALID_ORDINAL) { + float value = values[ordinal]; + if (value > 0) { + FacetResultNode node = new FacetResultNode(); + node.label = taxonomyReader.getPath(ordinal); + node.value = value; + nodes.add(node); + } + ordinal = siblings[ordinal]; + } + } + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/search/FloatFacetResultsHandler.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/search/IntFacetResultsHandler.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/IntFacetResultsHandler.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/search/IntFacetResultsHandler.java (working copy) @@ -0,0 +1,79 @@ +package org.apache.lucene.facet.search; + +import java.io.IOException; +import java.util.ArrayList; + +import org.apache.lucene.facet.search.params.FacetRequest; +import org.apache.lucene.facet.search.results.FacetResultNode; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.util.PriorityQueue; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A {@link DepthOneFacetResultsHandler} which fills the categories values from + * {@link FacetArrays#getIntArray()}. + * + * @lucene.experimental + */ +public final class IntFacetResultsHandler extends DepthOneFacetResultsHandler { + + private final int[] values; + + public IntFacetResultsHandler(TaxonomyReader taxonomyReader, FacetRequest facetRequest, FacetArrays facetArrays) { + super(taxonomyReader, facetRequest, facetArrays); + this.values = facetArrays.getIntArray(); + } + + @Override + protected final double valueOf(int ordinal) { + return values[ordinal]; + } + + @Override + protected final int addSiblings(int ordinal, int[] siblings, PriorityQueue pq) { + FacetResultNode top = pq.top(); + int numResults = 0; + while (ordinal != TaxonomyReader.INVALID_ORDINAL) { + int value = values[ordinal]; + if (value > top.value) { + top.value = value; + top.ordinal = ordinal; + top = pq.updateTop(); + ++numResults; + } + ordinal = siblings[ordinal]; + } + return numResults; + } + + @Override + protected final void addSiblings(int ordinal, int[] siblings, ArrayList nodes) throws IOException { + while (ordinal != TaxonomyReader.INVALID_ORDINAL) { + int value = values[ordinal]; + if (value > 0) { + FacetResultNode node = new FacetResultNode(); + node.label = taxonomyReader.getPath(ordinal); + node.value = value; + nodes.add(node); + } + ordinal = siblings[ordinal]; + } + } + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/search/IntFacetResultsHandler.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/search/MatchingDocsAsScoredDocIDs.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/MatchingDocsAsScoredDocIDs.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/search/MatchingDocsAsScoredDocIDs.java (working copy) @@ -0,0 +1,169 @@ +package org.apache.lucene.facet.search; + +import java.io.IOException; +import java.util.Iterator; +import java.util.List; + +import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Represents {@link MatchingDocs} as {@link ScoredDocIDs}. + * + * @lucene.experimental + */ +public class MatchingDocsAsScoredDocIDs implements ScoredDocIDs { + + // TODO remove this class once we get rid of ScoredDocIDs + + final List matchingDocs; + final int size; + + public MatchingDocsAsScoredDocIDs(List matchingDocs) { + this.matchingDocs = matchingDocs; + int totalSize = 0; + for (MatchingDocs md : matchingDocs) { + totalSize += md.totalHits; + } + this.size = totalSize; + } + + @Override + public ScoredDocIDsIterator iterator() throws IOException { + return new ScoredDocIDsIterator() { + + final Iterator mdIter = matchingDocs.iterator(); + + int scoresIdx = 0; + int doc = 0; + MatchingDocs current; + int currentLength; + boolean done = false; + + @Override + public boolean next() { + if (done) { + return false; + } + + while (current == null) { + if (!mdIter.hasNext()) { + done = true; + return false; + } + current = mdIter.next(); + currentLength = current.bits.length(); + doc = 0; + scoresIdx = 0; + + if (doc >= currentLength || (doc = current.bits.nextSetBit(doc)) == -1) { + current = null; + } else { + doc = -1; // we're calling nextSetBit later on + } + } + + ++doc; + if (doc >= currentLength || (doc = current.bits.nextSetBit(doc)) == -1) { + current = null; + return next(); + } + + return true; + } + + @Override + public float getScore() { + return current.scores == null ? ScoredDocIDsIterator.DEFAULT_SCORE : current.scores[scoresIdx++]; + } + + @Override + public int getDocID() { + return done ? DocIdSetIterator.NO_MORE_DOCS : doc + current.context.docBase; + } + }; + } + + @Override + public DocIdSet getDocIDs() { + return new DocIdSet() { + + final Iterator mdIter = matchingDocs.iterator(); + int doc = 0; + MatchingDocs current; + int currentLength; + boolean done = false; + + @Override + public DocIdSetIterator iterator() throws IOException { + return new DocIdSetIterator() { + + @Override + public int nextDoc() throws IOException { + if (done) { + return DocIdSetIterator.NO_MORE_DOCS; + } + + while (current == null) { + if (!mdIter.hasNext()) { + done = true; + return DocIdSetIterator.NO_MORE_DOCS; + } + current = mdIter.next(); + currentLength = current.bits.length(); + doc = 0; + + if (doc >= currentLength || (doc = current.bits.nextSetBit(doc)) == -1) { + current = null; + } else { + doc = -1; // we're calling nextSetBit later on + } + } + + ++doc; + if (doc >= currentLength || (doc = current.bits.nextSetBit(doc)) == -1) { + current = null; + return nextDoc(); + } + + return doc + current.context.docBase; + } + + @Override + public int docID() { + return doc + current.context.docBase; + } + + @Override + public int advance(int target) throws IOException { + throw new UnsupportedOperationException("not supported"); + } + }; + } + }; + } + + @Override + public int size() { + return size; + } + +} \ No newline at end of file Property changes on: lucene/facet/src/java/org/apache/lucene/facet/search/MatchingDocsAsScoredDocIDs.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/search/SamplingWrapper.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/SamplingWrapper.java (revision 1443446) +++ lucene/facet/src/java/org/apache/lucene/facet/search/SamplingWrapper.java (working copy) @@ -4,6 +4,7 @@ import java.util.ArrayList; import java.util.List; +import org.apache.lucene.facet.partitions.search.PartitionsFacetResultsHandler; import org.apache.lucene.facet.search.params.FacetSearchParams; import org.apache.lucene.facet.search.results.FacetResult; import org.apache.lucene.facet.search.sampling.Sampler; @@ -35,12 +36,12 @@ * * @lucene.experimental */ -public class SamplingWrapper extends FacetsAccumulator { +public class SamplingWrapper extends StandardFacetsAccumulator { - private FacetsAccumulator delegee; + private StandardFacetsAccumulator delegee; private Sampler sampler; - public SamplingWrapper(FacetsAccumulator delegee, Sampler sampler) { + public SamplingWrapper(StandardFacetsAccumulator delegee, Sampler sampler) { super(delegee.searchParams, delegee.indexReader, delegee.taxonomyReader); this.delegee = delegee; this.sampler = sampler; @@ -48,11 +49,6 @@ @Override public List accumulate(ScoredDocIDs docids) throws IOException { - // first let delegee accumulate without labeling at all (though - // currently it doesn't matter because we have to label all returned anyhow) - boolean origAllowLabeling = isAllowLabeling(); - setAllowLabeling(false); - // Replacing the original searchParams with the over-sampled (and without statistics-compute) FacetSearchParams original = delegee.searchParams; delegee.searchParams = sampler.overSampledSearchParams(original); @@ -60,24 +56,20 @@ SampleResult sampleSet = sampler.getSampleSet(docids); List sampleRes = delegee.accumulate(sampleSet.docids); - setAllowLabeling(origAllowLabeling); List fixedRes = new ArrayList(); for (FacetResult fres : sampleRes) { // for sure fres is not null because this is guaranteed by the delegee. - FacetResultsHandler frh = fres.getFacetRequest().createFacetResultsHandler(taxonomyReader); + PartitionsFacetResultsHandler frh = createFacetResultsHandler(fres.getFacetRequest()); // fix the result of current request - sampler.getSampleFixer(indexReader, taxonomyReader, searchParams) - .fixResult(docids, fres); + sampler.getSampleFixer(indexReader, taxonomyReader, searchParams).fixResult(docids, fres); fres = frh.rearrangeFacetResult(fres); // let delegee's handler do any // Using the sampler to trim the extra (over-sampled) results fres = sampler.trimResult(fres); // final labeling if allowed (because labeling is a costly operation) - if (isAllowLabeling()) { - frh.labelResult(fres); - } + frh.labelResult(fres); fixedRes.add(fres); // add to final results } @@ -96,14 +88,4 @@ delegee.setComplementThreshold(complementThreshold); } - @Override - protected boolean isAllowLabeling() { - return delegee.isAllowLabeling(); - } - - @Override - protected void setAllowLabeling(boolean allowLabeling) { - delegee.setAllowLabeling(allowLabeling); - } - } Index: lucene/facet/src/java/org/apache/lucene/facet/search/ScoredDocIDs.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/ScoredDocIDs.java (revision 1443446) +++ lucene/facet/src/java/org/apache/lucene/facet/search/ScoredDocIDs.java (working copy) @@ -25,7 +25,7 @@ * Document IDs with scores for each, driving facets accumulation. Document * scores are optionally used in the process of facets scoring. * - * @see FacetsAccumulator#accumulate(ScoredDocIDs) + * @see StandardFacetsAccumulator#accumulate(ScoredDocIDs) * @lucene.experimental */ public interface ScoredDocIDs { Index: lucene/facet/src/java/org/apache/lucene/facet/search/ScoredDocIdCollector.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/ScoredDocIdCollector.java (revision 1443446) +++ lucene/facet/src/java/org/apache/lucene/facet/search/ScoredDocIdCollector.java (working copy) @@ -1,235 +0,0 @@ -package org.apache.lucene.facet.search; - -import java.io.IOException; - -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.search.Collector; -import org.apache.lucene.search.DocIdSet; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.FixedBitSet; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * A {@link Collector} which stores all docIDs and their scores in a - * {@link ScoredDocIDs} instance. If scoring is not enabled, then the default - * score as set in {@link #setDefaultScore(float)} (or - * {@link ScoredDocIDsIterator#DEFAULT_SCORE}) will be set for all documents. - * - * @lucene.experimental - */ -public abstract class ScoredDocIdCollector extends Collector { - - private static final class NonScoringDocIdCollector extends ScoredDocIdCollector { - - float defaultScore = ScoredDocIDsIterator.DEFAULT_SCORE; - - @SuppressWarnings("synthetic-access") - public NonScoringDocIdCollector(int maxDoc) { - super(maxDoc); - } - - @Override - public boolean acceptsDocsOutOfOrder() { return true; } - - @Override - public void collect(int doc) { - docIds.set(docBase + doc); - ++numDocIds; - } - - @Override - public float getDefaultScore() { - return defaultScore; - } - - @Override - protected ScoredDocIDsIterator scoredDocIdsIterator() { - return new ScoredDocIDsIterator() { - - private DocIdSetIterator docIdsIter = docIds.iterator(); - private int nextDoc; - - @Override - public int getDocID() { return nextDoc; } - @Override - public float getScore() { return defaultScore; } - - @Override - public boolean next() { - try { - nextDoc = docIdsIter.nextDoc(); - return nextDoc != DocIdSetIterator.NO_MORE_DOCS; - } catch (IOException e) { - // This should not happen as we're iterating over an OpenBitSet. For - // completeness, terminate iteration - nextDoc = DocIdSetIterator.NO_MORE_DOCS; - return false; - } - } - - }; - } - - @Override - public void setDefaultScore(float defaultScore) { - this.defaultScore = defaultScore; - } - - @Override - public void setScorer(Scorer scorer) {} - } - - private static final class ScoringDocIdCollector extends ScoredDocIdCollector { - - float[] scores; - private Scorer scorer; - - @SuppressWarnings("synthetic-access") - public ScoringDocIdCollector(int maxDoc) { - super(maxDoc); - // only matching documents have an entry in the scores array. Therefore start with - // a small array and grow when needed. - scores = new float[64]; - } - - @Override - public boolean acceptsDocsOutOfOrder() { return false; } - - @Override - public void collect(int doc) throws IOException { - docIds.set(docBase + doc); - - float score = this.scorer.score(); - if (numDocIds >= scores.length) { - float[] newScores = new float[ArrayUtil.oversize(numDocIds + 1, 4)]; - System.arraycopy(scores, 0, newScores, 0, numDocIds); - scores = newScores; - } - scores[numDocIds] = score; - ++numDocIds; - } - - @Override - protected ScoredDocIDsIterator scoredDocIdsIterator() { - return new ScoredDocIDsIterator() { - - private DocIdSetIterator docIdsIter = docIds.iterator(); - private int nextDoc; - private int scoresIdx = -1; - - @Override - public int getDocID() { return nextDoc; } - @Override - public float getScore() { return scores[scoresIdx]; } - - @Override - public boolean next() { - try { - nextDoc = docIdsIter.nextDoc(); - if (nextDoc == DocIdSetIterator.NO_MORE_DOCS) { - return false; - } - ++scoresIdx; - return true; - } catch (IOException e) { - // This should not happen as we're iterating over an OpenBitSet. For - // completeness, terminate iteration - nextDoc = DocIdSetIterator.NO_MORE_DOCS; - return false; - } - } - - }; - } - - @Override - public float getDefaultScore() { return ScoredDocIDsIterator.DEFAULT_SCORE; } - - @Override - public void setDefaultScore(float defaultScore) {} - - @Override - public void setScorer(Scorer scorer) { - this.scorer = scorer; - } - } - - protected int numDocIds; - protected int docBase; - protected final FixedBitSet docIds; - - /** - * Creates a new {@link ScoredDocIdCollector} with the given parameters. - * - * @param maxDoc the number of documents that are expected to be collected. - * Note that if more documents are collected, unexpected exceptions may - * be thrown. Usually you should pass {@link IndexReader#maxDoc()} of - * the same IndexReader with which the search is executed. - * @param enableScoring if scoring is enabled, a score will be computed for - * every matching document, which might be expensive. Therefore if you - * do not require scoring, it is better to set it to false. - */ - public static ScoredDocIdCollector create(int maxDoc, boolean enableScoring) { - return enableScoring ? new ScoringDocIdCollector(maxDoc) : new NonScoringDocIdCollector(maxDoc); - } - - private ScoredDocIdCollector(int maxDoc) { - numDocIds = 0; - docIds = new FixedBitSet(maxDoc); - } - - protected abstract ScoredDocIDsIterator scoredDocIdsIterator() throws IOException; - - /** Returns the default score used when scoring is disabled. */ - public abstract float getDefaultScore(); - - /** Set the default score. Only applicable if scoring is disabled. */ - public abstract void setDefaultScore(float defaultScore); - - - public ScoredDocIDs getScoredDocIDs() { - return new ScoredDocIDs() { - - @Override - public ScoredDocIDsIterator iterator() throws IOException { - return scoredDocIdsIterator(); - } - - @Override - public DocIdSet getDocIDs() { - return docIds; - } - - @Override - public int size() { - return numDocIds; - } - - }; - } - - @Override - public void setNextReader(AtomicReaderContext context) throws IOException { - this.docBase = context.docBase; - } - -} Index: lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java (revision 1443446) +++ lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java (working copy) @@ -11,11 +11,15 @@ import java.util.logging.Logger; import org.apache.lucene.facet.index.params.FacetIndexingParams; +import org.apache.lucene.facet.partitions.search.PartitionsFacetResultsHandler; +import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; import org.apache.lucene.facet.search.aggregator.Aggregator; +import org.apache.lucene.facet.search.params.CountFacetRequest; import org.apache.lucene.facet.search.params.FacetRequest; +import org.apache.lucene.facet.search.params.FacetRequest.ResultMode; import org.apache.lucene.facet.search.params.FacetSearchParams; import org.apache.lucene.facet.search.results.FacetResult; -import org.apache.lucene.facet.search.results.IntermediateFacetResult; +import org.apache.lucene.facet.partitions.search.IntermediateFacetResult; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.facet.util.PartitionsUtils; import org.apache.lucene.facet.util.ScoredDocIdsUtils; @@ -66,8 +70,24 @@ private static final Logger logger = Logger.getLogger(StandardFacetsAccumulator.class.getName()); - protected final FacetArrays facetArrays; + /** + * Default threshold for using the complements optimization. + * If accumulating facets for a document set larger than this ratio of the index size than + * perform the complement optimization. + * @see #setComplementThreshold(double) for more info on the complements optimization. + */ + public static final double DEFAULT_COMPLEMENT_THRESHOLD = 0.6; + /** + * Passing this to {@link #setComplementThreshold(double)} will disable using complement optimization. + */ + public static final double DISABLE_COMPLEMENT = Double.POSITIVE_INFINITY; // > 1 actually + + /** + * Passing this to {@link #setComplementThreshold(double)} will force using complement optimization. + */ + public static final double FORCE_COMPLEMENT = 0; // <=0 + protected int partitionSize; protected int maxPartitions; protected boolean isUsingComplements; @@ -76,15 +96,18 @@ private Object accumulateGuard; + private double complementThreshold; + + public StandardFacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader, + TaxonomyReader taxonomyReader) { + this(searchParams, indexReader, taxonomyReader, new FacetArrays( + PartitionsUtils.partitionSize(searchParams.indexingParams, taxonomyReader))); + } + public StandardFacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader, TaxonomyReader taxonomyReader, FacetArrays facetArrays) { - super(searchParams,indexReader,taxonomyReader); + super(searchParams, indexReader, taxonomyReader, facetArrays); - if (facetArrays == null) { - throw new IllegalArgumentException("facetArrays cannot be null"); - } - - this.facetArrays = facetArrays; // can only be computed later when docids size is known isUsingComplements = false; partitionSize = PartitionsUtils.partitionSize(searchParams.indexingParams, taxonomyReader); @@ -92,13 +115,7 @@ accumulateGuard = new Object(); } - public StandardFacetsAccumulator(FacetSearchParams searchParams, - IndexReader indexReader, TaxonomyReader taxonomyReader) { - this(searchParams, indexReader, taxonomyReader, new FacetArrays( - PartitionsUtils.partitionSize(searchParams.indexingParams, taxonomyReader))); - } - - @Override + // TODO: this should be removed once we clean the API public List accumulate(ScoredDocIDs docids) throws IOException { // synchronize to prevent calling two accumulate()'s at the same time. @@ -162,8 +179,8 @@ for (FacetRequest fr : searchParams.facetRequests) { // Handle and merge only facet requests which were not already handled. if (handledRequests.add(fr)) { - FacetResultsHandler frHndlr = fr.createFacetResultsHandler(taxonomyReader); - IntermediateFacetResult res4fr = frHndlr.fetchPartitionResult(facetArrays, offset); + PartitionsFacetResultsHandler frHndlr = createFacetResultsHandler(fr); + IntermediateFacetResult res4fr = frHndlr.fetchPartitionResult(offset); IntermediateFacetResult oldRes = fr2tmpRes.get(fr); if (oldRes != null) { res4fr = frHndlr.mergeResults(oldRes, res4fr); @@ -179,16 +196,14 @@ // gather results from all requests into a list for returning them List res = new ArrayList(); for (FacetRequest fr : searchParams.facetRequests) { - FacetResultsHandler frHndlr = fr.createFacetResultsHandler(taxonomyReader); + PartitionsFacetResultsHandler frHndlr = createFacetResultsHandler(fr); IntermediateFacetResult tmpResult = fr2tmpRes.get(fr); if (tmpResult == null) { continue; // do not add a null to the list. } FacetResult facetRes = frHndlr.renderFacetResult(tmpResult); // final labeling if allowed (because labeling is a costly operation) - if (isAllowLabeling()) { - frHndlr.labelResult(facetRes); - } + frHndlr.labelResult(facetRes); res.add(facetRes); } @@ -196,6 +211,25 @@ } } + /** check if all requests are complementable */ + protected boolean mayComplement() { + for (FacetRequest freq : searchParams.facetRequests) { + if (!(freq instanceof CountFacetRequest)) { + return false; + } + } + return true; + } + + @Override + protected PartitionsFacetResultsHandler createFacetResultsHandler(FacetRequest fr) { + if (fr.getResultMode() == ResultMode.PER_NODE_IN_TREE) { + return new TopKInEachNodeHandler(taxonomyReader, fr, facetArrays); + } else { + return new TopKFacetResultsHandler(taxonomyReader, fr, facetArrays); + } + } + /** * Set the actual set of documents over which accumulation should take place. *

@@ -338,4 +372,43 @@ return categoryLists; } -} \ No newline at end of file + + @Override + public List accumulate(List matchingDocs) throws IOException { + return accumulate(new MatchingDocsAsScoredDocIDs(matchingDocs)); + } + + /** + * Returns the complement threshold. + * @see #setComplementThreshold(double) + */ + public double getComplementThreshold() { + return complementThreshold; + } + + /** + * Set the complement threshold. + * This threshold will dictate whether the complements optimization is applied. + * The optimization is to count for less documents. It is useful when the same + * FacetSearchParams are used for varying sets of documents. The first time + * complements is used the "total counts" are computed - counting for all the + * documents in the collection. Then, only the complementing set of documents + * is considered, and used to decrement from the overall counts, thereby + * walking through less documents, which is faster. + *

+ * For the default settings see {@link #DEFAULT_COMPLEMENT_THRESHOLD}. + *

+ * To forcing complements in all cases pass {@link #FORCE_COMPLEMENT}. + * This is mostly useful for testing purposes, as forcing complements when only + * tiny fraction of available documents match the query does not make sense and + * would incur performance degradations. + *

+ * To disable complements pass {@link #DISABLE_COMPLEMENT}. + * @param complementThreshold the complement threshold to set + * @see #getComplementThreshold() + */ + public void setComplementThreshold(double complementThreshold) { + this.complementThreshold = complementThreshold; + } + +} Index: lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsCollector.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsCollector.java (revision 1443446) +++ lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsCollector.java (working copy) @@ -1,151 +0,0 @@ -package org.apache.lucene.facet.search; - -import java.io.IOException; -import java.util.List; - -import org.apache.lucene.facet.index.params.CategoryListParams; -import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy; -import org.apache.lucene.facet.search.params.FacetRequest; -import org.apache.lucene.facet.search.params.FacetSearchParams; -import org.apache.lucene.facet.search.results.FacetResult; -import org.apache.lucene.facet.taxonomy.TaxonomyReader; -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.search.Scorer; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * A {@link FacetsCollector} which allows initilizing e.g. - * {@link FacetsAccumulator}. Supports facet partitions, generic - * {@link FacetRequest facet requests}, {@link CategoryListParams} etc. - * - *

- * NOTE: this collector, with the default {@link FacetsAccumulator} does - * not support category lists which were indexed with - * {@link OrdinalPolicy#NO_PARENTS}. - * - * @lucene.experimental - */ -public class StandardFacetsCollector extends FacetsCollector { - - protected final FacetsAccumulator facetsAccumulator; - private ScoredDocIdCollector scoreDocIdCollector; - private List results; - private Object resultsGuard; - - static String assertParams(FacetSearchParams fsp) { - // make sure none of the categories in the given FacetRequests was indexed with NO_PARENTS - for (FacetRequest fr : fsp.facetRequests) { - CategoryListParams clp = fsp.indexingParams.getCategoryListParams(fr.categoryPath); - if (clp.getOrdinalPolicy(fr.categoryPath.components[0]) == OrdinalPolicy.NO_PARENTS) { - return "this collector does not support aggregating categories that were indexed with OrdinalPolicy.NO_PARENTS"; - } - } - return null; - } - - /** - * Create a collector for accumulating facets while collecting documents - * during search. - * - * @param facetSearchParams - * faceted search parameters defining which facets are required and - * how. - * @param indexReader - * searched index. - * @param taxonomyReader - * taxonomy containing the facets. - */ - public StandardFacetsCollector(FacetSearchParams facetSearchParams, IndexReader indexReader, TaxonomyReader taxonomyReader) { - assert assertParams(facetSearchParams) == null : assertParams(facetSearchParams); - facetsAccumulator = initFacetsAccumulator(facetSearchParams, indexReader, taxonomyReader); - scoreDocIdCollector = initScoredDocCollector(facetSearchParams, indexReader, taxonomyReader); - resultsGuard = new Object(); - } - - /** - * Create a {@link ScoredDocIdCollector} to be used as the first phase of - * the facet collection. If all facetRequests are do not require the - * document score, a ScoredDocIdCollector which does not store the document - * scores would be returned. Otherwise a SDIC which does store the documents - * will be returned, having an initial allocated space for 1000 such - * documents' scores. - */ - protected ScoredDocIdCollector initScoredDocCollector(FacetSearchParams facetSearchParams, IndexReader indexReader, - TaxonomyReader taxonomyReader) { - boolean scoresNeeded = false; - for (FacetRequest frq : facetSearchParams.facetRequests) { - if (frq.requireDocumentScore()) { - scoresNeeded = true; - break; - } - } - return ScoredDocIdCollector.create(indexReader.maxDoc(), scoresNeeded); - } - - /** - * Create the {@link FacetsAccumulator} to be used. Default is - * {@link StandardFacetsAccumulator}. Called once at the constructor of the collector. - * - * @param facetSearchParams - * The search params. - * @param indexReader - * A reader to the index to search in. - * @param taxonomyReader - * A reader to the active taxonomy. - * @return The {@link FacetsAccumulator} to use. - */ - protected FacetsAccumulator initFacetsAccumulator(FacetSearchParams facetSearchParams, IndexReader indexReader, - TaxonomyReader taxonomyReader) { - return new StandardFacetsAccumulator(facetSearchParams, indexReader, taxonomyReader); - } - - @Override - public List getFacetResults() throws IOException { - synchronized (resultsGuard) { // over protection - if (results == null) { - // lazy creation but just once - results = facetsAccumulator.accumulate(scoreDocIdCollector.getScoredDocIDs()); - scoreDocIdCollector = null; - } - return results; - } - } - - @Override - public boolean acceptsDocsOutOfOrder() { - return false; - } - - @Override - public void collect(int doc) throws IOException { - scoreDocIdCollector.collect(doc); - } - - @Override - public void setNextReader(AtomicReaderContext context) throws IOException { - scoreDocIdCollector.setNextReader(context); - } - - @Override - public void setScorer(Scorer scorer) throws IOException { - scoreDocIdCollector.setScorer(scorer); - } - -} Index: lucene/facet/src/java/org/apache/lucene/facet/search/SumScoreFacetsAggregator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/SumScoreFacetsAggregator.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/search/SumScoreFacetsAggregator.java (working copy) @@ -0,0 +1,78 @@ +package org.apache.lucene.facet.search; + +import java.io.IOException; + +import org.apache.lucene.facet.index.params.CategoryListParams; +import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.util.IntsRef; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A {@link FacetsAggregator} which updates the weight of a category by summing the + * scores of documents it was found in. + */ +public class SumScoreFacetsAggregator implements FacetsAggregator { + + private final IntsRef ordinals = new IntsRef(32); + + @Override + public void aggregate(MatchingDocs matchingDocs, CategoryListParams clp, FacetArrays facetArrays) throws IOException { + CategoryListIterator cli = clp.createCategoryListIterator(0); + if (!cli.setNextReader(matchingDocs.context)) { + return; + } + + int doc = 0; + int length = matchingDocs.bits.length(); + float[] scores = facetArrays.getFloatArray(); + while (doc < length && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) { + cli.getOrdinals(doc, ordinals); + int upto = ordinals.offset + ordinals.length; + for (int i = ordinals.offset; i < upto; i++) { + scores[ordinals.ints[i]] += matchingDocs.scores[doc]; + } + ++doc; + } + } + + private float rollupScores(int ordinal, int[] children, int[] siblings, float[] scores) { + float score = 0f; + while (ordinal != TaxonomyReader.INVALID_ORDINAL) { + float childScore = scores[ordinal]; + childScore += rollupScores(children[ordinal], children, siblings, scores); + scores[ordinal] = childScore; + score += childScore; + ordinal = siblings[ordinal]; + } + return score; + } + + @Override + public void rollupValues(int ordinal, int[] children, int[] siblings, FacetArrays facetArrays) { + float[] scores = facetArrays.getFloatArray(); + scores[ordinal] += rollupScores(children[ordinal], children, siblings, scores); + } + + @Override + public boolean requiresDocScores() { + return true; + } + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/search/SumScoreFacetsAggregator.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/search/TopKFacetResultsHandler.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/TopKFacetResultsHandler.java (revision 1443446) +++ lucene/facet/src/java/org/apache/lucene/facet/search/TopKFacetResultsHandler.java (working copy) @@ -3,10 +3,11 @@ import java.io.IOException; import java.util.ArrayList; +import org.apache.lucene.facet.partitions.search.IntermediateFacetResult; +import org.apache.lucene.facet.partitions.search.PartitionsFacetResultsHandler; import org.apache.lucene.facet.search.params.FacetRequest; import org.apache.lucene.facet.search.results.FacetResult; import org.apache.lucene.facet.search.results.FacetResultNode; -import org.apache.lucene.facet.search.results.IntermediateFacetResult; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.facet.taxonomy.directory.ParallelTaxonomyArrays; import org.apache.lucene.facet.util.ResultSortUtils; @@ -28,30 +29,29 @@ * limitations under the License. */ -/** - * Generate Top-K results for a particular FacetRequest. - *

- * K is global (among all results) and is defined by {@link FacetRequest#getNumResults()}. - *

- * Note: Values of 0 (Zero) are ignored by this results handler. +/** + * Generate Top-K results for a particular {@link FacetRequest}. K is global + * (among all results) and is defined by {@link FacetRequest#numResults}. * * @lucene.experimental */ -public class TopKFacetResultsHandler extends FacetResultsHandler { +public class TopKFacetResultsHandler extends PartitionsFacetResultsHandler { /** - * Construct top-K results handler. - * @param taxonomyReader taxonomy reader - * @param facetRequest facet request being served + * Construct top-K results handler. + * + * @param taxonomyReader + * taxonomy reader + * @param facetRequest + * facet request being served */ - public TopKFacetResultsHandler(TaxonomyReader taxonomyReader, - FacetRequest facetRequest) { - super(taxonomyReader, facetRequest); + public TopKFacetResultsHandler(TaxonomyReader taxonomyReader, FacetRequest facetRequest, FacetArrays facetArrays) { + super(taxonomyReader, facetRequest, facetArrays); } // fetch top K for specific partition. @Override - public IntermediateFacetResult fetchPartitionResult(FacetArrays facetArrays, int offset) + public IntermediateFacetResult fetchPartitionResult(int offset) throws IOException { TopKFacetResult res = null; int ordinal = taxonomyReader.getOrdinal(facetRequest.categoryPath); @@ -65,7 +65,7 @@ FacetResultNode parentResultNode = new FacetResultNode(ordinal, value); Heap heap = ResultSortUtils.createSuitableHeap(facetRequest); - int totalFacets = heapDescendants(ordinal, heap, parentResultNode, facetArrays, offset); + int totalFacets = heapDescendants(ordinal, heap, parentResultNode, offset); res = new TopKFacetResult(facetRequest, parentResultNode, totalFacets); res.setHeap(heap); } @@ -113,7 +113,7 @@ * @return total number of descendants considered here by pq, excluding ordinal itself. */ private int heapDescendants(int ordinal, Heap pq, FacetResultNode parentResultNode, - FacetArrays facetArrays, int offset) throws IOException { + int offset) throws IOException { int partitionSize = facetArrays.arrayLength; int endOffset = offset + partitionSize; ParallelTaxonomyArrays childrenArray = taxonomyReader.getParallelTaxonomyArrays(); Index: lucene/facet/src/java/org/apache/lucene/facet/search/TopKInEachNodeHandler.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/TopKInEachNodeHandler.java (revision 1443446) +++ lucene/facet/src/java/org/apache/lucene/facet/search/TopKInEachNodeHandler.java (working copy) @@ -4,11 +4,12 @@ import java.util.ArrayList; import java.util.List; +import org.apache.lucene.facet.partitions.search.IntermediateFacetResult; +import org.apache.lucene.facet.partitions.search.PartitionsFacetResultsHandler; import org.apache.lucene.facet.search.params.FacetRequest; import org.apache.lucene.facet.search.params.FacetRequest.SortOrder; import org.apache.lucene.facet.search.results.FacetResult; import org.apache.lucene.facet.search.results.FacetResultNode; -import org.apache.lucene.facet.search.results.IntermediateFacetResult; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.facet.taxonomy.directory.ParallelTaxonomyArrays; import org.apache.lucene.util.PriorityQueue; @@ -33,20 +34,19 @@ */ /** - * Generates {@link FacetResult} from the count arrays aggregated for a + * Generates {@link FacetResult} from the {@link FacetArrays} aggregated for a * particular {@link FacetRequest}. The generated {@link FacetResult} is a * subtree of the taxonomy tree. Its root node, * {@link FacetResult#getFacetResultNode()}, is the facet specified by * {@link FacetRequest#categoryPath}, and the enumerated children, - * {@link FacetResultNode#subResults}, of each node in that - * {@link FacetResult} are the top K ( = {@link FacetRequest#getNumResults()}) - * among its children in the taxonomy. Top in the sense - * {@link FacetRequest#getSortBy()}, which can be by the values aggregated in - * the count arrays, or by ordinal numbers; also specified is the sort order, - * {@link FacetRequest#getSortOrder()}, ascending or descending, of these values - * or ordinals before their top K are selected. The depth (number of levels - * excluding the root) of the {@link FacetResult} tree is specified by - * {@link FacetRequest#getDepth()}. + * {@link FacetResultNode#subResults}, of each node in that {@link FacetResult} + * are the top K ( = {@link FacetRequest#numResults}) among its children in the + * taxonomy. Top in the sense {@link FacetRequest#getSortBy()}, which can be by + * the values aggregated in the count arrays, or by ordinal numbers; also + * specified is the sort order, {@link FacetRequest#getSortOrder()}, ascending + * or descending, of these values or ordinals before their top K are selected. + * The depth (number of levels excluding the root) of the {@link FacetResult} + * tree is specified by {@link FacetRequest#getDepth()}. *

* Because the number of selected children of each node is restricted, and not * the overall number of nodes in the {@link FacetResult}, facets not selected @@ -67,45 +67,45 @@ * * @lucene.experimental */ -public class TopKInEachNodeHandler extends FacetResultsHandler { +public class TopKInEachNodeHandler extends PartitionsFacetResultsHandler { - public TopKInEachNodeHandler(TaxonomyReader taxonomyReader, FacetRequest facetRequest) { - super(taxonomyReader, facetRequest); + public TopKInEachNodeHandler(TaxonomyReader taxonomyReader, FacetRequest facetRequest, FacetArrays facetArrays) { + super(taxonomyReader, facetRequest, facetArrays); } /** * Recursively explore all facets that can be potentially included in the * {@link FacetResult} to be generated, and that belong to the given * partition, so that values can be examined and collected. For each such - * node, gather its top K ({@link FacetRequest#getNumResults()}) children - * among its children that are encountered in the given particular partition - * (aka current counting list). + * node, gather its top K ({@link FacetRequest#numResults}) children among its + * children that are encountered in the given particular partition (aka + * current counting list). + * @param offset + * to offset + the length of the count arrays within + * arrays (exclusive) * * @return {@link IntermediateFacetResult} consisting of - * {@link IntToObjectMap} that maps potential - * {@link FacetResult} nodes to their top K children encountered in - * the current partition. Note that the mapped potential tree nodes - * need not belong to the given partition, only the top K children - * mapped to. The aim is to identify nodes that are certainly excluded - * from the {@link FacetResult} to be eventually (after going through - * all the partitions) returned by this handler, because they have K - * better siblings, already identified in this partition. For the - * identified excluded nodes, we only count number of their - * descendants in the subtree (to be included in + * {@link IntToObjectMap} that maps potential {@link FacetResult} + * nodes to their top K children encountered in the current partition. + * Note that the mapped potential tree nodes need not belong to the + * given partition, only the top K children mapped to. The aim is to + * identify nodes that are certainly excluded from the + * {@link FacetResult} to be eventually (after going through all the + * partitions) returned by this handler, because they have K better + * siblings, already identified in this partition. For the identified + * excluded nodes, we only count number of their descendants in the + * subtree (to be included in * {@link FacetResult#getNumValidDescendants()}), but not bother with * selecting top K in these generations, which, by definition, are, * too, excluded from the FacetResult tree. - * @param arrays the already filled in count array, potentially only covering - * one partition: the ordinals ranging from - * @param offset to offset + the length of the count arrays - * within arrays (exclusive) - * @throws IOException in case - * {@link TaxonomyReader#getOrdinal(org.apache.lucene.facet.taxonomy.CategoryPath)} - * does. - * @see FacetResultsHandler#fetchPartitionResult(FacetArrays, int) + * @throws IOException + * in case + * {@link TaxonomyReader#getOrdinal(org.apache.lucene.facet.taxonomy.CategoryPath)} + * does. + * @see #fetchPartitionResult(int) */ @Override - public IntermediateFacetResult fetchPartitionResult(FacetArrays arrays, int offset) throws IOException { + public IntermediateFacetResult fetchPartitionResult(int offset) throws IOException { // get the root of the result tree to be returned, and the depth of that result tree // (depth means number of node levels excluding the root). @@ -114,12 +114,12 @@ return null; } - int K = Math.min(facetRequest.getNumResults(),taxonomyReader.getSize()); // number of best results in each node + int K = Math.min(facetRequest.numResults,taxonomyReader.getSize()); // number of best results in each node // this will grow into the returned IntermediateFacetResult IntToObjectMap AACOsOfOnePartition = new IntToObjectMap(); - int partitionSize = arrays.arrayLength; // all partitions, except, possibly, the last, + int partitionSize = facetArrays.arrayLength; // all partitions, except, possibly, the last, // have the same length. Hence modulo is OK. int depth = facetRequest.getDepth(); @@ -128,9 +128,9 @@ // Need to only have root node. IntermediateFacetResultWithHash tempFRWH = new IntermediateFacetResultWithHash( facetRequest, AACOsOfOnePartition); - if (isSelfPartition(rootNode, arrays, offset)) { + if (isSelfPartition(rootNode, facetArrays, offset)) { tempFRWH.isRootNodeIncluded = true; - tempFRWH.rootNodeValue = this.facetRequest.getValueOf(arrays, rootNode % partitionSize); + tempFRWH.rootNodeValue = this.facetRequest.getValueOf(facetArrays, rootNode % partitionSize); } return tempFRWH; } @@ -274,7 +274,7 @@ while (tosOrdinal >= offset) { // while tosOrdinal belongs to the given partition; here, too, we use the fact // that TaxonomyReader.INVALID_ORDINAL == -1 < offset - double value = facetRequest.getValueOf(arrays, tosOrdinal % partitionSize); + double value = facetRequest.getValueOf(facetArrays, tosOrdinal % partitionSize); if (value != 0) { // the value of yc is not 0, it is to be considered. totalNumOfDescendantsConsidered++; @@ -291,7 +291,7 @@ totalNumOfDescendantsConsidered--; // reduce the 1 earned when the excluded node entered the heap // and now return it and all its descendants. These will never make it to FacetResult totalNumOfDescendantsConsidered += countOnly (ac.ordinal, children, - siblings, arrays, partitionSize, offset, endOffset, localDepth, depth); + siblings, partitionSize, offset, endOffset, localDepth, depth); reusables[++tosReuslables] = ac; } } @@ -343,9 +343,9 @@ // now generate a TempFacetResult from AACOsOfOnePartition, and consider self. IntermediateFacetResultWithHash tempFRWH = new IntermediateFacetResultWithHash( facetRequest, AACOsOfOnePartition); - if (isSelfPartition(rootNode, arrays, offset)) { + if (isSelfPartition(rootNode, facetArrays, offset)) { tempFRWH.isRootNodeIncluded = true; - tempFRWH.rootNodeValue = this.facetRequest.getValueOf(arrays, rootNode % partitionSize); + tempFRWH.rootNodeValue = this.facetRequest.getValueOf(facetArrays, rootNode % partitionSize); } tempFRWH.totalNumOfFacetsConsidered = totalNumOfDescendantsConsidered; return tempFRWH; @@ -367,24 +367,21 @@ * @param youngestChild mapping a given ordinal to its youngest child in the taxonomy (of largest ordinal number), * or to -1 if has no children. * @param olderSibling mapping a given ordinal to its older sibling, or to -1 - * @param arrays values for the ordinals in the given partition + * @param partitionSize number of ordinals in the given partition * @param offset the first (smallest) ordinal in the given partition - * @param partitionSize number of ordinals in the given partition * @param endOffset one larger than the largest ordinal that belong to this partition * @param currentDepth the depth or ordinal in the TaxonomyTree (relative to rootnode of the facetRequest) * @param maxDepth maximal depth of descendants to be considered here (measured relative to rootnode of the * facetRequest). - * * @return the number of nodes, from ordinal down its descendants, of depth <= maxDepth, * which reside in the current partition, and whose value != 0 */ - private int countOnly(int ordinal, int[] youngestChild, int[] olderSibling, - FacetArrays arrays, int partitionSize, int offset, - int endOffset, int currentDepth, int maxDepth) { + private int countOnly(int ordinal, int[] youngestChild, int[] olderSibling, int partitionSize, int offset, + int endOffset, int currentDepth, int maxDepth) { int ret = 0; if (offset <= ordinal) { // ordinal belongs to the current partition - if (0 != facetRequest.getValueOf(arrays, ordinal % partitionSize)) { + if (0 != facetRequest.getValueOf(facetArrays, ordinal % partitionSize)) { ret++; } } @@ -398,8 +395,8 @@ yc = olderSibling[yc]; } while (yc > TaxonomyReader.INVALID_ORDINAL) { // assuming this is -1, smaller than any legal ordinal - ret += countOnly (yc, youngestChild, olderSibling, arrays, - partitionSize, offset, endOffset, currentDepth+1, maxDepth); + ret += countOnly (yc, youngestChild, olderSibling, partitionSize, + offset, endOffset, currentDepth+1, maxDepth); yc = olderSibling[yc]; } return ret; @@ -409,11 +406,10 @@ * Merge several partitions' {@link IntermediateFacetResult}-s into one of the * same format * - * @see FacetResultsHandler#mergeResults(IntermediateFacetResult...) + * @see #mergeResults(IntermediateFacetResult...) */ @Override - public IntermediateFacetResult mergeResults(IntermediateFacetResult... tmpResults) - throws ClassCastException, IllegalArgumentException { + public IntermediateFacetResult mergeResults(IntermediateFacetResult... tmpResults) { if (tmpResults.length == 0) { return null; @@ -428,7 +424,7 @@ } // i points to the first non-null input - int K = this.facetRequest.getNumResults(); // number of best result in each node + int K = this.facetRequest.numResults; // number of best result in each node IntermediateFacetResultWithHash tmpToReturn = (IntermediateFacetResultWithHash)tmpResults[i++]; // now loop over the rest of tmpResults and merge each into tmpToReturn @@ -667,7 +663,7 @@ * Enumerated in ordinals are siblings, * potential nodes of the {@link FacetResult} tree * (i.e., the descendants of the root node, no deeper than the specified depth). - * No more than K ( = {@link FacetRequest#getNumResults()}) + * No more than K ( = {@link FacetRequest#numResults}) * siblings are enumerated. * @lucene.internal */ @@ -681,11 +677,6 @@ } @Override - /** - * Recursively label the first facetRequest.getNumLabel() sub results - * of the root of a given {@link FacetResult}, or of an already labeled node in it. - * I.e., a node is labeled only if it is the root or all its ancestors are labeled. - */ public void labelResult(FacetResult facetResult) throws IOException { if (facetResult == null) { return; // any result to label? @@ -700,10 +691,9 @@ } node.label = taxonomyReader.getPath(node.ordinal); - // label the first numToLabel of these children, and recursively -- their children. + // recursively label the first numToLabel children of every node int numLabeled = 0; for (FacetResultNode frn : node.subResults) { - // go over the children of node from first to last, no more than numToLable of them recursivelyLabel(frn, numToLabel); if (++numLabeled >= numToLabel) { return; @@ -719,7 +709,7 @@ // so now we test and re-order if necessary. public FacetResult rearrangeFacetResult(FacetResult facetResult) { PriorityQueue nodesHeap = - new ResultNodeHeap(this.facetRequest.getNumResults(), this.getSuitableACComparator()); + new ResultNodeHeap(this.facetRequest.numResults, this.getSuitableACComparator()); FacetResultNode topFrn = facetResult.getFacetResultNode(); rearrangeChilrenOfNode(topFrn, nodesHeap); return facetResult; Index: lucene/facet/src/java/org/apache/lucene/facet/search/TotalFacetCounts.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/TotalFacetCounts.java (revision 1443446) +++ lucene/facet/src/java/org/apache/lucene/facet/search/TotalFacetCounts.java (working copy) @@ -156,7 +156,7 @@ final int[][] counts = new int[(int) Math.ceil(taxonomy.getSize() /(float) partitionSize)][partitionSize]; FacetSearchParams newSearchParams = new FacetSearchParams(facetIndexingParams, DUMMY_REQ); //createAllListsSearchParams(facetIndexingParams, this.totalCounts); - FacetsAccumulator fe = new StandardFacetsAccumulator(newSearchParams, indexReader, taxonomy) { + StandardFacetsAccumulator sfa = new StandardFacetsAccumulator(newSearchParams, indexReader, taxonomy) { @Override protected HashMap getCategoryListMap( FacetArrays facetArrays, int partition) throws IOException { @@ -169,8 +169,8 @@ return map; } }; - fe.setComplementThreshold(FacetsAccumulator.DISABLE_COMPLEMENT); - fe.accumulate(ScoredDocIdsUtils.createAllDocsScoredDocIDs(indexReader)); + sfa.setComplementThreshold(StandardFacetsAccumulator.DISABLE_COMPLEMENT); + sfa.accumulate(ScoredDocIdsUtils.createAllDocsScoredDocIDs(indexReader)); return new TotalFacetCounts(taxonomy, facetIndexingParams, counts, CreationType.Computed); } Index: lucene/facet/src/java/org/apache/lucene/facet/search/params/CountFacetRequest.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/params/CountFacetRequest.java (revision 1443446) +++ lucene/facet/src/java/org/apache/lucene/facet/search/params/CountFacetRequest.java (working copy) @@ -31,15 +31,6 @@ */ public class CountFacetRequest extends FacetRequest { - /** - * Create a count facet request for a given node in the taxonomy. - * - * @param path category path of the category of interest. - * @param num number of child categories for which count info is requeted. - * reqiested. Default implementation will find top categories, - - * this behavior can be overridden by overriding - * {@link #createFacetResultsHandler(TaxonomyReader)}. - */ public CountFacetRequest(CategoryPath path, int num) { super(path, num); } @@ -60,12 +51,8 @@ } @Override - public boolean supportsComplements() { - return true; + public FacetArraysSource getFacetArraysSource() { + return FacetArraysSource.INT; } - @Override - public boolean requireDocumentScore() { - return false; - } } Index: lucene/facet/src/java/org/apache/lucene/facet/search/params/FacetRequest.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/params/FacetRequest.java (revision 1443446) +++ lucene/facet/src/java/org/apache/lucene/facet/search/params/FacetRequest.java (working copy) @@ -4,8 +4,6 @@ import org.apache.lucene.facet.search.FacetArrays; import org.apache.lucene.facet.search.FacetResultsHandler; -import org.apache.lucene.facet.search.TopKFacetResultsHandler; -import org.apache.lucene.facet.search.TopKInEachNodeHandler; import org.apache.lucene.facet.search.aggregator.Aggregator; import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.facet.taxonomy.TaxonomyReader; @@ -40,28 +38,67 @@ * * @lucene.experimental */ -public abstract class FacetRequest implements Cloneable { +public abstract class FacetRequest { /** + * Result structure manner of applying request's limits such as + * {@link FacetRequest#getNumLabel()} and {@link FacetRequest#numResults}. + * Only relevant when {@link FacetRequest#getDepth()} is > 1. + */ + public enum ResultMode { + /** Limits are applied per node, and the result has a full tree structure. */ + PER_NODE_IN_TREE, + + /** Limits are applied globally, on total number of results, and the result has a flat structure. */ + GLOBAL_FLAT + } + + /** + * Specifies which array of {@link FacetArrays} should be used to resolve + * values. When set to {@link #INT} or {@link #FLOAT}, allows creating an + * optimized {@link FacetResultsHandler}, which does not call + * {@link FacetRequest#getValueOf(FacetArrays, int)} for every ordinals. + *

+ * If set to {@link #BOTH}, the {@link FacetResultsHandler} will use + * {@link FacetRequest#getValueOf(FacetArrays, int)} to resolve ordinal + * values, although it is recommended that you consider writing a specialized + * {@link FacetResultsHandler}. + */ + public enum FacetArraysSource { INT, FLOAT, BOTH } + + /** Sort options for facet results. */ + public enum SortBy { + /** sort by category ordinal with the taxonomy */ + ORDINAL, + + /** sort by computed category value */ + VALUE + } + + /** Requested sort order for the results. */ + public enum SortOrder { ASCENDING, DESCENDING } + + /** * Default depth for facets accumulation. * @see #getDepth() */ public static final int DEFAULT_DEPTH = 1; - + /** * Default sort mode. * @see #getSortBy() */ public static final SortBy DEFAULT_SORT_BY = SortBy.VALUE; - + /** * Default result mode * @see #getResultMode() */ public static final ResultMode DEFAULT_RESULT_MODE = ResultMode.PER_NODE_IN_TREE; - + public final CategoryPath categoryPath; - private final int numResults; + public final int numResults; + private int numLabel; private int depth; private SortOrder sortOrder; @@ -72,7 +109,7 @@ * {@link CategoryPath} and numResults */ private final int hashCode; - + private ResultMode resultMode = DEFAULT_RESULT_MODE; /** @@ -107,40 +144,55 @@ hashCode = categoryPath.hashCode() ^ this.numResults; } + /** + * Create an aggregator for this facet request. Aggregator action depends on + * request definition. For a count request, it will usually increment the + * count for that facet. + * + * @param useComplements + * whether the complements optimization is being used for current + * computation. + * @param arrays + * provider for facet arrays in use for current computation. + * @param taxonomy + * reader of taxonomy in effect. + * @throws IOException If there is a low-level I/O error. + */ + public abstract Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy) + throws IOException; + @Override - public FacetRequest clone() throws CloneNotSupportedException { - // Overridden to make it public - return (FacetRequest)super.clone(); + public boolean equals(Object o) { + if (o instanceof FacetRequest) { + FacetRequest that = (FacetRequest)o; + return that.hashCode == this.hashCode && + that.categoryPath.equals(this.categoryPath) && + that.numResults == this.numResults && + that.depth == this.depth && + that.resultMode == this.resultMode && + that.numLabel == this.numLabel; + } + return false; } - - public void setNumLabel(int numLabel) { - this.numLabel = numLabel; - } - public void setDepth(int depth) { - this.depth = depth; - } - - public void setSortOrder(SortOrder sortOrder) { - this.sortOrder = sortOrder; - } - - public void setSortBy(SortBy sortBy) { - this.sortBy = sortBy; - } - /** * How deeply to look under the given category. If the depth is 0, * only the category itself is counted. If the depth is 1, its immediate * children are also counted, and so on. If the depth is Integer.MAX_VALUE, * all the category's descendants are counted.
- * TODO (Facet): add AUTO_EXPAND option */ public final int getDepth() { + // TODO add AUTO_EXPAND option return depth; } - + /** + * Returns the {@link FacetArraysSource} this {@link FacetRequest} uses in + * {@link #getValueOf(FacetArrays, int)}. + */ + public abstract FacetArraysSource getFacetArraysSource(); + + /** * If getNumLabel() < getNumResults(), only the first getNumLabel() results * will have their category paths calculated, and the rest will only be * available as ordinals (category numbers) and will have null paths. @@ -167,140 +219,22 @@ return numLabel; } - /** - * The number of sub-categories to return (at most). If the sub-categories are - * returned. - *

- * If Integer.MAX_VALUE is specified, all sub-categories are returned. - *

- * Depending on the {@link #getResultMode() LimitsMode}, this limit is applied - * globally or per results node. In the global mode, if this limit is 3, only - * 3 top results would be computed. In the per-node mode, if this limit is 3, - * 3 top children of {@link #categoryPath the target category} would be - * returned, as well as 3 top children of each of them, and so forth, until - * the depth defined by {@link #getDepth()}. - * - * @see #getResultMode() - */ - public final int getNumResults() { - return numResults; + /** Return the requested result mode. */ + public final ResultMode getResultMode() { + return resultMode; } - /** - * Sort options for facet results. - */ - public enum SortBy { - /** sort by category ordinal with the taxonomy */ - ORDINAL, - - /** sort by computed category value */ - VALUE - } - /** Specify how should results be sorted. */ public final SortBy getSortBy() { return sortBy; } - /** Requested sort order for the results. */ - public enum SortOrder { ASCENDING, DESCENDING } - /** Return the requested order of results. */ public final SortOrder getSortOrder() { return sortOrder; } - @Override - public String toString() { - return categoryPath.toString()+" nRes="+numResults+" nLbl="+numLabel; - } - /** - * Creates a new {@link FacetResultsHandler} that matches the request logic - * and current settings, such as {@link #getDepth() depth}, - * {@link #getResultMode() limits-mode}, etc, as well as the passed in - * {@link TaxonomyReader}. - * - * @param taxonomyReader taxonomy reader is needed e.g. for knowing the - * taxonomy size. - */ - public FacetResultsHandler createFacetResultsHandler(TaxonomyReader taxonomyReader) { - try { - if (resultMode == ResultMode.PER_NODE_IN_TREE) { - return new TopKInEachNodeHandler(taxonomyReader, clone()); - } - return new TopKFacetResultsHandler(taxonomyReader, clone()); - } catch (CloneNotSupportedException e) { - // Shouldn't happen since we implement Cloneable. If it does happen, it is - // probably because the class was changed to not implement Cloneable - // anymore. - throw new RuntimeException(e); - } - } - - /** - * Result structure manner of applying request's limits such as - * {@link #getNumLabel()} and - * {@link #getNumResults()}. - */ - public enum ResultMode { - /** Limits are applied per node, and the result has a full tree structure. */ - PER_NODE_IN_TREE, - - /** Limits are applied globally, on total number of results, and the result has a flat structure. */ - GLOBAL_FLAT - } - - /** Return the requested result mode. */ - public final ResultMode getResultMode() { - return resultMode; - } - - /** - * @param resultMode the resultMode to set - * @see #getResultMode() - */ - public void setResultMode(ResultMode resultMode) { - this.resultMode = resultMode; - } - - @Override - public int hashCode() { - return hashCode; - } - - @Override - public boolean equals(Object o) { - if (o instanceof FacetRequest) { - FacetRequest that = (FacetRequest)o; - return that.hashCode == this.hashCode && - that.categoryPath.equals(this.categoryPath) && - that.numResults == this.numResults && - that.depth == this.depth && - that.resultMode == this.resultMode && - that.numLabel == this.numLabel; - } - return false; - } - - /** - * Create an aggregator for this facet request. Aggregator action depends on - * request definition. For a count request, it will usually increment the - * count for that facet. - * - * @param useComplements - * whether the complements optimization is being used for current - * computation. - * @param arrays - * provider for facet arrays in use for current computation. - * @param taxonomy - * reader of taxonomy in effect. - * @throws IOException If there is a low-level I/O error. - */ - public abstract Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy) - throws IOException; - - /** * Return the value of a category used for facets computations for this * request. For a count request this would be the count for that facet, i.e. * an integer number. but for other requests this can be the result of a more @@ -319,16 +253,44 @@ * getValueOf would be invoked with idx * being n % partitionSize. */ + // TODO perhaps instead of getValueOf we can have a postProcess(FacetArrays) + // That, together with getFacetArraysSource should allow ResultHandlers to + // efficiently obtain the values from the arrays directly public abstract double getValueOf(FacetArrays arrays, int idx); + + @Override + public int hashCode() { + return hashCode; + } + + public void setDepth(int depth) { + this.depth = depth; + } + + public void setNumLabel(int numLabel) { + this.numLabel = numLabel; + } /** - * Indicates whether this facet request is eligible for applying the complements optimization. + * @param resultMode the resultMode to set + * @see #getResultMode() */ - public boolean supportsComplements() { - return false; // by default: no + public void setResultMode(ResultMode resultMode) { + this.resultMode = resultMode; } - /** Indicates whether the results of this request depends on each result document's score */ - public abstract boolean requireDocumentScore(); + public void setSortBy(SortBy sortBy) { + this.sortBy = sortBy; + } + public void setSortOrder(SortOrder sortOrder) { + this.sortOrder = sortOrder; + } + + @Override + public String toString() { + return categoryPath.toString()+" nRes="+numResults+" nLbl="+numLabel; + } + } + \ No newline at end of file Index: lucene/facet/src/java/org/apache/lucene/facet/search/params/FacetSearchParams.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/params/FacetSearchParams.java (revision 1443446) +++ lucene/facet/src/java/org/apache/lucene/facet/search/params/FacetSearchParams.java (working copy) @@ -77,20 +77,21 @@ this.facetRequests = facetRequests; this.indexingParams = indexingParams; } - + @Override public String toString() { - final char TAB = '\t'; + final String INDENT = " "; final char NEWLINE = '\n'; StringBuilder sb = new StringBuilder("IndexingParams: "); - sb.append(NEWLINE).append(TAB).append(indexingParams); + sb.append(NEWLINE).append(INDENT).append(indexingParams); sb.append(NEWLINE).append("FacetRequests:"); for (FacetRequest facetRequest : facetRequests) { - sb.append(NEWLINE).append(TAB).append(facetRequest); + sb.append(NEWLINE).append(INDENT).append(facetRequest); } return sb.toString(); } + } Index: lucene/facet/src/java/org/apache/lucene/facet/search/params/ScoreFacetRequest.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/params/ScoreFacetRequest.java (revision 1443446) +++ lucene/facet/src/java/org/apache/lucene/facet/search/params/ScoreFacetRequest.java (working copy) @@ -1,59 +0,0 @@ -package org.apache.lucene.facet.search.params; - -import org.apache.lucene.facet.search.FacetArrays; -import org.apache.lucene.facet.search.aggregator.Aggregator; -import org.apache.lucene.facet.search.aggregator.ScoringAggregator; -import org.apache.lucene.facet.taxonomy.CategoryPath; -import org.apache.lucene.facet.taxonomy.TaxonomyReader; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * Facet request for weighting facets according to document scores. - * - * @lucene.experimental - */ -public class ScoreFacetRequest extends FacetRequest { - - /** Create a score facet request for a given node in the taxonomy. */ - public ScoreFacetRequest(CategoryPath path, int num) { - super(path, num); - } - - @Override - public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy) { - assert !useComplements : "complements are not supported by this FacetRequest"; - return new ScoringAggregator(arrays.getFloatArray()); - } - - @Override - public double getValueOf(FacetArrays arrays, int ordinal) { - return arrays.getFloatArray()[ordinal]; - } - - @Override - public boolean supportsComplements() { - return false; - } - - @Override - public boolean requireDocumentScore() { - return true; - } - -} Index: lucene/facet/src/java/org/apache/lucene/facet/search/params/SumScoreFacetRequest.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/params/SumScoreFacetRequest.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/search/params/SumScoreFacetRequest.java (working copy) @@ -0,0 +1,55 @@ +package org.apache.lucene.facet.search.params; + +import org.apache.lucene.facet.search.FacetArrays; +import org.apache.lucene.facet.search.aggregator.Aggregator; +import org.apache.lucene.facet.search.aggregator.ScoringAggregator; +import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A {@link FacetRequest} for weighting facets by summing the scores of matching + * documents. + * + * @lucene.experimental + */ +public class SumScoreFacetRequest extends FacetRequest { + + /** Create a score facet request for a given node in the taxonomy. */ + public SumScoreFacetRequest(CategoryPath path, int num) { + super(path, num); + } + + @Override + public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy) { + assert !useComplements : "complements are not supported by this FacetRequest"; + return new ScoringAggregator(arrays.getFloatArray()); + } + + @Override + public double getValueOf(FacetArrays arrays, int ordinal) { + return arrays.getFloatArray()[ordinal]; + } + + @Override + public FacetArraysSource getFacetArraysSource() { + return FacetArraysSource.FLOAT; + } + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/search/params/SumScoreFacetRequest.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/search/params/associations/AssociationFloatSumFacetRequest.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/params/associations/AssociationFloatSumFacetRequest.java (revision 1443446) +++ lucene/facet/src/java/org/apache/lucene/facet/search/params/associations/AssociationFloatSumFacetRequest.java (working copy) @@ -58,13 +58,8 @@ } @Override - public boolean supportsComplements() { - return false; + public FacetArraysSource getFacetArraysSource() { + return FacetArraysSource.FLOAT; } - @Override - public boolean requireDocumentScore() { - return false; - } - } Index: lucene/facet/src/java/org/apache/lucene/facet/search/params/associations/AssociationIntSumFacetRequest.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/params/associations/AssociationIntSumFacetRequest.java (revision 1443446) +++ lucene/facet/src/java/org/apache/lucene/facet/search/params/associations/AssociationIntSumFacetRequest.java (working copy) @@ -54,18 +54,13 @@ } @Override + public FacetArraysSource getFacetArraysSource() { + return FacetArraysSource.INT; + } + + @Override public double getValueOf(FacetArrays arrays, int ordinal) { return arrays.getIntArray()[ordinal]; } - @Override - public boolean supportsComplements() { - return false; - } - - @Override - public boolean requireDocumentScore() { - return false; - } - } Index: lucene/facet/src/java/org/apache/lucene/facet/search/results/FacetResult.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/results/FacetResult.java (revision 1443446) +++ lucene/facet/src/java/org/apache/lucene/facet/search/results/FacetResult.java (working copy) @@ -46,11 +46,10 @@ } /** - * Number of descendants of {@link #getFacetResultNode() root facet result node}, - * up till the requested depth, which are valid by the - * {@link FacetRequest#createFacetResultsHandler(org.apache.lucene.facet.taxonomy.TaxonomyReader) - * results handler in effect}. Typically -- have value != 0. - * This number does not include the root node. + * Number of descendants of {@link #getFacetResultNode() root facet result + * node}, up till the requested depth. Typically -- have value != 0. This + * number does not include the root node. + * * @see #getFacetRequest() * @see FacetRequest#getDepth() */ Index: lucene/facet/src/java/org/apache/lucene/facet/search/results/FacetResultNode.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/results/FacetResultNode.java (revision 1443446) +++ lucene/facet/src/java/org/apache/lucene/facet/search/results/FacetResultNode.java (working copy) @@ -47,7 +47,7 @@ *

* NOTE: by default, all nodes are labeled. Only when * {@link FacetRequest#getNumLabel()} < - * {@link FacetRequest#getNumResults()} there will be unlabeled nodes. + * {@link FacetRequest#numResults} there will be unlabeled nodes. */ public CategoryPath label; Index: lucene/facet/src/java/org/apache/lucene/facet/search/results/IntermediateFacetResult.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/results/IntermediateFacetResult.java (revision 1443446) +++ lucene/facet/src/java/org/apache/lucene/facet/search/results/IntermediateFacetResult.java (working copy) @@ -1,41 +0,0 @@ -package org.apache.lucene.facet.search.results; - -import org.apache.lucene.facet.search.FacetResultsHandler; -import org.apache.lucene.facet.search.params.FacetRequest; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * Intermediate {@link FacetResult} of faceted search. - *

- * This is an empty interface on purpose. - *

- * It allows {@link FacetResultsHandler} to return intermediate result objects - * that only it knows how to interpret, and so the handler has maximal freedom - * in defining what an intermediate result is, depending on its specific logic. - * - * @lucene.experimental - */ -public interface IntermediateFacetResult { - - /** - * Facet request for which this temporary result was created. - */ - FacetRequest getFacetRequest(); - -} Index: lucene/facet/src/java/org/apache/lucene/facet/search/sampling/Sampler.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/sampling/Sampler.java (revision 1443446) +++ lucene/facet/src/java/org/apache/lucene/facet/search/sampling/Sampler.java (working copy) @@ -168,7 +168,7 @@ FacetRequest origFrq = sampledFreq.orig; FacetResultNode trimmedRootNode = facetResult.getFacetResultNode(); - trimSubResults(trimmedRootNode, origFrq.getNumResults()); + trimSubResults(trimmedRootNode, origFrq.numResults); return new FacetResult(origFrq, trimmedRootNode, facetResult.getNumValidDescendants()); } @@ -199,7 +199,7 @@ if (overSampleFactor > 1) { // any factoring to do? List facetRequests = new ArrayList(); for (FacetRequest frq : original.facetRequests) { - int overSampledNumResults = (int) Math.ceil(frq.getNumResults() * overSampleFactor); + int overSampledNumResults = (int) Math.ceil(frq.numResults * overSampleFactor); facetRequests.add(new OverSampledFacetRequest(frq, overSampledNumResults)); } res = new FacetSearchParams(facetRequests, original.indexingParams); @@ -233,19 +233,14 @@ } @Override + public FacetArraysSource getFacetArraysSource() { + return orig.getFacetArraysSource(); + } + + @Override public double getValueOf(FacetArrays arrays, int idx) { return orig.getValueOf(arrays, idx); } - - @Override - public boolean requireDocumentScore() { - return orig.requireDocumentScore(); - } - - @Override - public boolean supportsComplements() { - return orig.supportsComplements(); - } } } Index: lucene/facet/src/java/org/apache/lucene/facet/search/sampling/SamplingAccumulator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/sampling/SamplingAccumulator.java (revision 1443446) +++ lucene/facet/src/java/org/apache/lucene/facet/search/sampling/SamplingAccumulator.java (working copy) @@ -4,8 +4,8 @@ import java.util.ArrayList; import java.util.List; +import org.apache.lucene.facet.partitions.search.PartitionsFacetResultsHandler; import org.apache.lucene.facet.search.FacetArrays; -import org.apache.lucene.facet.search.FacetResultsHandler; import org.apache.lucene.facet.search.FacetsAccumulator; import org.apache.lucene.facet.search.SamplingWrapper; import org.apache.lucene.facet.search.ScoredDocIDs; @@ -42,7 +42,8 @@ * directly extends {@link StandardFacetsAccumulator}. *

  • This class can effectively apply sampling on the complement set of * matching document, thereby working efficiently with the complement - * optimization - see {@link FacetsAccumulator#getComplementThreshold()}.
  • + * optimization - see {@link StandardFacetsAccumulator#getComplementThreshold()} + * . * *

    * Note: Sampling accumulation (Accumulation over a sampled-set of the results), @@ -77,36 +78,26 @@ @Override public List accumulate(ScoredDocIDs docids) throws IOException { - // first let delegee accumulate without labeling at all (though - // currently it doesn't matter because we have to label all returned anyhow) - boolean origAllowLabeling = isAllowLabeling(); - setAllowLabeling(false); - // Replacing the original searchParams with the over-sampled FacetSearchParams original = searchParams; searchParams = sampler.overSampledSearchParams(original); List sampleRes = super.accumulate(docids); - setAllowLabeling(origAllowLabeling); List fixedRes = new ArrayList(); for (FacetResult fres : sampleRes) { // for sure fres is not null because this is guaranteed by the delegee. - FacetResultsHandler frh = fres.getFacetRequest().createFacetResultsHandler( - taxonomyReader); + PartitionsFacetResultsHandler frh = createFacetResultsHandler(fres.getFacetRequest()); // fix the result of current request - sampler.getSampleFixer(indexReader, taxonomyReader, searchParams) - .fixResult(docids, fres); + sampler.getSampleFixer(indexReader, taxonomyReader, searchParams).fixResult(docids, fres); - fres = frh.rearrangeFacetResult(fres); // let delegee's handler do any + fres = frh.rearrangeFacetResult(fres); // let delegee's handler do any arranging it needs to // Using the sampler to trim the extra (over-sampled) results fres = sampler.trimResult(fres); - // arranging it needs to + // final labeling if allowed (because labeling is a costly operation) - if (isAllowLabeling()) { - frh.labelResult(fres); - } + frh.labelResult(fres); fixedRes.add(fres); // add to final results } Index: lucene/facet/src/java/org/apache/lucene/facet/util/ResultSortUtils.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/util/ResultSortUtils.java (revision 1443446) +++ lucene/facet/src/java/org/apache/lucene/facet/util/ResultSortUtils.java (working copy) @@ -41,7 +41,7 @@ * @throws IllegalArgumentException is provided facet request is not supported */ public static Heap createSuitableHeap(FacetRequest facetRequest) { - int nresults = facetRequest.getNumResults(); + int nresults = facetRequest.numResults; boolean accending = (facetRequest.getSortOrder() == SortOrder.ASCENDING); if (nresults == Integer.MAX_VALUE) { Index: lucene/facet/src/java/org/apache/lucene/facet/util/TaxonomyMergeUtils.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/util/TaxonomyMergeUtils.java (revision 1443446) +++ lucene/facet/src/java/org/apache/lucene/facet/util/TaxonomyMergeUtils.java (working copy) @@ -6,17 +6,13 @@ import org.apache.lucene.facet.index.OrdinalMappingAtomicReader; import org.apache.lucene.facet.index.params.FacetIndexingParams; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; -import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.DiskOrdinalMap; -import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.MemoryOrdinalMap; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.OrdinalMap; import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.MultiReader; import org.apache.lucene.store.Directory; -import org.apache.lucene.util.Version; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -42,41 +38,7 @@ public class TaxonomyMergeUtils { /** - * Merges the given taxonomy and index directories. Note that this method - * opens {@link DirectoryTaxonomyWriter} and {@link IndexWriter} on the - * respective destination indexes. Therefore if you have a writer open on any - * of them, it should be closed, or you should use - * {@link #merge(Directory, Directory, IndexWriter, DirectoryTaxonomyWriter, FacetIndexingParams)} - * instead. - * - * @see #merge(Directory, Directory, IndexWriter, DirectoryTaxonomyWriter, FacetIndexingParams) - */ - public static void merge(Directory srcIndexDir, Directory srcTaxDir, Directory destIndexDir, Directory destTaxDir, - FacetIndexingParams params) throws IOException { - IndexWriter destIndexWriter = new IndexWriter(destIndexDir, new IndexWriterConfig(Version.LUCENE_42, null)); - DirectoryTaxonomyWriter destTaxWriter = new DirectoryTaxonomyWriter(destTaxDir); - merge(srcIndexDir, srcTaxDir, new MemoryOrdinalMap(), destIndexWriter, destTaxWriter, params); - destTaxWriter.close(); - destIndexWriter.close(); - } - - /** * Merges the given taxonomy and index directories and commits the changes to - * the given writers. This method uses {@link MemoryOrdinalMap} to store the - * mapped ordinals. If you cannot afford the memory, you can use - * {@link #merge(Directory, Directory, DirectoryTaxonomyWriter.OrdinalMap, IndexWriter, DirectoryTaxonomyWriter, FacetIndexingParams)} - * by passing {@link DiskOrdinalMap}. - * - * @see #merge(Directory, Directory, DirectoryTaxonomyWriter.OrdinalMap, - * IndexWriter, DirectoryTaxonomyWriter, FacetIndexingParams) - */ - public static void merge(Directory srcIndexDir, Directory srcTaxDir, IndexWriter destIndexWriter, - DirectoryTaxonomyWriter destTaxWriter, FacetIndexingParams params) throws IOException { - merge(srcIndexDir, srcTaxDir, new MemoryOrdinalMap(), destIndexWriter, destTaxWriter, params); - } - - /** - * Merges the given taxonomy and index directories and commits the changes to * the given writers. */ public static void merge(Directory srcIndexDir, Directory srcTaxDir, OrdinalMap map, IndexWriter destIndexWriter, Index: lucene/facet/src/test/org/apache/lucene/facet/FacetTestBase.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/FacetTestBase.java (revision 1443446) +++ lucene/facet/src/test/org/apache/lucene/facet/FacetTestBase.java (working copy) @@ -329,7 +329,7 @@ if (VERBOSE) { System.out.println(freq.categoryPath.toString()+ "\t\t" + topResNode); } - assertCountsAndCardinality(facetCountsTruth, topResNode, freq.getNumResults()); + assertCountsAndCardinality(facetCountsTruth, topResNode, freq.numResults); } } Index: lucene/facet/src/test/org/apache/lucene/facet/index/OrdinalMappingReaderTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/index/OrdinalMappingReaderTest.java (revision 1443446) +++ lucene/facet/src/test/org/apache/lucene/facet/index/OrdinalMappingReaderTest.java (working copy) @@ -17,13 +17,16 @@ import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; +import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.MemoryOrdinalMap; import org.apache.lucene.facet.util.TaxonomyMergeUtils; import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.IOUtils; import org.junit.Test; /* @@ -58,7 +61,13 @@ Directory taxDir1 = newDirectory(); buildIndexWithFacets(dir1, taxDir1, false, fip); - TaxonomyMergeUtils.merge(dir, taxDir, dir1, taxDir1, fip); + IndexWriter destIndexWriter = new IndexWriter(dir1, new IndexWriterConfig(TEST_VERSION_CURRENT, null)); + DirectoryTaxonomyWriter destTaxWriter = new DirectoryTaxonomyWriter(taxDir1); + try { + TaxonomyMergeUtils.merge(dir, taxDir, new MemoryOrdinalMap(), destIndexWriter, destTaxWriter, fip); + } finally { + IOUtils.close(destIndexWriter, destTaxWriter); + } verifyResults(dir1, taxDir1, fip); dir1.close(); Index: lucene/facet/src/test/org/apache/lucene/facet/search/AdaptiveAccumulatorTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/AdaptiveAccumulatorTest.java (revision 1443446) +++ lucene/facet/src/test/org/apache/lucene/facet/search/AdaptiveAccumulatorTest.java (working copy) @@ -29,7 +29,7 @@ public class AdaptiveAccumulatorTest extends BaseSampleTestTopK { @Override - protected FacetsAccumulator getSamplingAccumulator(Sampler sampler, TaxonomyReader taxoReader, + protected StandardFacetsAccumulator getSamplingAccumulator(Sampler sampler, TaxonomyReader taxoReader, IndexReader indexReader, FacetSearchParams searchParams) { AdaptiveFacetsAccumulator res = new AdaptiveFacetsAccumulator(searchParams, indexReader, taxoReader); res.setSampler(sampler); Index: lucene/facet/src/test/org/apache/lucene/facet/search/CountingFacetsAggregatorTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/CountingFacetsAggregatorTest.java (revision 0) +++ lucene/facet/src/test/org/apache/lucene/facet/search/CountingFacetsAggregatorTest.java (working copy) @@ -0,0 +1,390 @@ +package org.apache.lucene.facet.search; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.StringField; +import org.apache.lucene.facet.FacetTestCase; +import org.apache.lucene.facet.index.FacetFields; +import org.apache.lucene.facet.index.params.CategoryListParams; +import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy; +import org.apache.lucene.facet.index.params.FacetIndexingParams; +import org.apache.lucene.facet.index.params.PerDimensionOrdinalPolicy; +import org.apache.lucene.facet.search.params.CountFacetRequest; +import org.apache.lucene.facet.search.params.FacetSearchParams; +import org.apache.lucene.facet.search.results.FacetResult; +import org.apache.lucene.facet.search.results.FacetResultNode; +import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.facet.taxonomy.TaxonomyWriter; +import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; +import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.NoMergePolicy; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.collections.ObjectToIntMap; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class CountingFacetsAggregatorTest extends FacetTestCase { + + private static final Term A = new Term("f", "a"); + private static final CategoryPath CP_A = new CategoryPath("A"), CP_B = new CategoryPath("B"); + private static final CategoryPath CP_C = new CategoryPath("C"), CP_D = new CategoryPath("D"); // indexed w/ NO_PARENTS + private static final int NUM_CHILDREN_CP_A = 5, NUM_CHILDREN_CP_B = 3; + private static final int NUM_CHILDREN_CP_C = 5, NUM_CHILDREN_CP_D = 5; + private static final CategoryPath[] CATEGORIES_A, CATEGORIES_B; + private static final CategoryPath[] CATEGORIES_C, CATEGORIES_D; + static { + CATEGORIES_A = new CategoryPath[NUM_CHILDREN_CP_A]; + for (int i = 0; i < NUM_CHILDREN_CP_A; i++) { + CATEGORIES_A[i] = new CategoryPath(CP_A.components[0], Integer.toString(i)); + } + CATEGORIES_B = new CategoryPath[NUM_CHILDREN_CP_B]; + for (int i = 0; i < NUM_CHILDREN_CP_B; i++) { + CATEGORIES_B[i] = new CategoryPath(CP_B.components[0], Integer.toString(i)); + } + + // NO_PARENTS categories + CATEGORIES_C = new CategoryPath[NUM_CHILDREN_CP_C]; + for (int i = 0; i < NUM_CHILDREN_CP_C; i++) { + CATEGORIES_C[i] = new CategoryPath(CP_C.components[0], Integer.toString(i)); + } + + // Multi-level categories + CATEGORIES_D = new CategoryPath[NUM_CHILDREN_CP_D]; + for (int i = 0; i < NUM_CHILDREN_CP_D; i++) { + String val = Integer.toString(i); + CATEGORIES_D[i] = new CategoryPath(CP_D.components[0], val, val + val); // e.g. D/1/11, D/2/22... + } + } + + private static Directory indexDir, taxoDir; + private static ObjectToIntMap allExpectedCounts, termExpectedCounts; + private static FacetIndexingParams fip; + + @AfterClass + public static void afterClassCountingFacetsAggregatorTest() throws Exception { + IOUtils.close(indexDir, taxoDir); + } + + private static List randomCategories(Random random) { + // add random categories from the two dimensions, ensuring that the same + // category is not added twice. + int numFacetsA = random.nextInt(3) + 1; // 1-3 + int numFacetsB = random.nextInt(2) + 1; // 1-2 + ArrayList categories_a = new ArrayList(); + categories_a.addAll(Arrays.asList(CATEGORIES_A)); + ArrayList categories_b = new ArrayList(); + categories_b.addAll(Arrays.asList(CATEGORIES_B)); + Collections.shuffle(categories_a, random); + Collections.shuffle(categories_b, random); + + ArrayList categories = new ArrayList(); + categories.addAll(categories_a.subList(0, numFacetsA)); + categories.addAll(categories_b.subList(0, numFacetsB)); + + // add the NO_PARENT categories + categories.add(CATEGORIES_C[random().nextInt(NUM_CHILDREN_CP_C)]); + categories.add(CATEGORIES_D[random().nextInt(NUM_CHILDREN_CP_D)]); + + return categories; + } + + private static void addField(Document doc) { + doc.add(new StringField(A.field(), A.text(), Store.NO)); + } + + private static void addFacets(Document doc, FacetFields facetFields, boolean updateTermExpectedCounts) + throws IOException { + List docCategories = randomCategories(random()); + for (CategoryPath cp : docCategories) { + if (cp.components[0].equals(CP_D.components[0])) { + cp = cp.subpath(2); // we'll get counts for the 2nd level only + } + allExpectedCounts.put(cp, allExpectedCounts.get(cp) + 1); + if (updateTermExpectedCounts) { + termExpectedCounts.put(cp, termExpectedCounts.get(cp) + 1); + } + } + // add 1 to each NO_PARENTS dimension + allExpectedCounts.put(CP_B, allExpectedCounts.get(CP_B) + 1); + allExpectedCounts.put(CP_C, allExpectedCounts.get(CP_C) + 1); + allExpectedCounts.put(CP_D, allExpectedCounts.get(CP_D) + 1); + if (updateTermExpectedCounts) { + termExpectedCounts.put(CP_B, termExpectedCounts.get(CP_B) + 1); + termExpectedCounts.put(CP_C, termExpectedCounts.get(CP_C) + 1); + termExpectedCounts.put(CP_D, termExpectedCounts.get(CP_D) + 1); + } + + facetFields.addFields(doc, docCategories); + } + + private static void indexDocsNoFacets(IndexWriter indexWriter) throws IOException { + int numDocs = atLeast(2); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + addField(doc); + indexWriter.addDocument(doc); + } + indexWriter.commit(); // flush a segment + } + + private static void indexDocsWithFacetsNoTerms(IndexWriter indexWriter, TaxonomyWriter taxoWriter, + ObjectToIntMap expectedCounts) throws IOException { + Random random = random(); + int numDocs = atLeast(random, 2); + FacetFields facetFields = new FacetFields(taxoWriter, fip); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + addFacets(doc, facetFields, false); + indexWriter.addDocument(doc); + } + indexWriter.commit(); // flush a segment + } + + private static void indexDocsWithFacetsAndTerms(IndexWriter indexWriter, TaxonomyWriter taxoWriter, + ObjectToIntMap expectedCounts) throws IOException { + Random random = random(); + int numDocs = atLeast(random, 2); + FacetFields facetFields = new FacetFields(taxoWriter, fip); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + addFacets(doc, facetFields, true); + addField(doc); + indexWriter.addDocument(doc); + } + indexWriter.commit(); // flush a segment + } + + private static void indexDocsWithFacetsAndSomeTerms(IndexWriter indexWriter, TaxonomyWriter taxoWriter, + ObjectToIntMap expectedCounts) throws IOException { + Random random = random(); + int numDocs = atLeast(random, 2); + FacetFields facetFields = new FacetFields(taxoWriter, fip); + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + boolean hasContent = random.nextBoolean(); + if (hasContent) { + addField(doc); + } + addFacets(doc, facetFields, hasContent); + indexWriter.addDocument(doc); + } + indexWriter.commit(); // flush a segment + } + + // initialize expectedCounts w/ 0 for all categories + private static ObjectToIntMap newCounts() { + ObjectToIntMap counts = new ObjectToIntMap(); + counts.put(CP_A, 0); + counts.put(CP_B, 0); + counts.put(CP_C, 0); + counts.put(CP_D, 0); + for (CategoryPath cp : CATEGORIES_A) { + counts.put(cp, 0); + } + for (CategoryPath cp : CATEGORIES_B) { + counts.put(cp, 0); + } + for (CategoryPath cp : CATEGORIES_C) { + counts.put(cp, 0); + } + for (CategoryPath cp : CATEGORIES_D) { + counts.put(cp.subpath(2), 0); + } + return counts; + } + + @BeforeClass + public static void beforeClassCountingFacetsAggregatorTest() throws Exception { + indexDir = newDirectory(); + taxoDir = newDirectory(); + + // create an index which has: + // 1. Segment with no categories, but matching results + // 2. Segment w/ categories, but no results + // 3. Segment w/ categories and results + // 4. Segment w/ categories, but only some results + + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES); // prevent merges, so we can control the index segments + IndexWriter indexWriter = new IndexWriter(indexDir, conf); + TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); + + Map policies = new HashMap(); + policies.put(CP_B.components[0], OrdinalPolicy.ALL_PARENTS); + policies.put(CP_C.components[0], OrdinalPolicy.NO_PARENTS); + policies.put(CP_D.components[0], OrdinalPolicy.NO_PARENTS); + CategoryListParams clp = new PerDimensionOrdinalPolicy(policies); + fip = new FacetIndexingParams(clp); + + allExpectedCounts = newCounts(); + termExpectedCounts = newCounts(); + + // segment w/ no categories + indexDocsNoFacets(indexWriter); + + // segment w/ categories, no content + indexDocsWithFacetsNoTerms(indexWriter, taxoWriter, allExpectedCounts); + + // segment w/ categories and content + indexDocsWithFacetsAndTerms(indexWriter, taxoWriter, allExpectedCounts); + + // segment w/ categories and some content + indexDocsWithFacetsAndSomeTerms(indexWriter, taxoWriter, allExpectedCounts); + + IOUtils.close(indexWriter, taxoWriter); + } + + private FacetsAccumulator randomAccumulator(FacetSearchParams fsp, IndexReader indexReader, TaxonomyReader taxoReader) { + final FacetsAggregator aggregator = random().nextBoolean() ? new CountingFacetsAggregator() : new FastCountingFacetsAggregator(); + return new FacetsAccumulator(fsp, indexReader, taxoReader) { + @Override + public FacetsAggregator getAggregator() { + return aggregator; + } + }; + } + + @Test + public void testDifferentNumResults() throws Exception { + // test the collector w/ FacetRequests and different numResults + DirectoryReader indexReader = DirectoryReader.open(indexDir); + TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); + IndexSearcher searcher = new IndexSearcher(indexReader); + + FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(CP_A, NUM_CHILDREN_CP_A), + new CountFacetRequest(CP_B, NUM_CHILDREN_CP_B)); + FacetsCollector fc = FacetsCollector.create(randomAccumulator(fsp, indexReader, taxoReader)); + TermQuery q = new TermQuery(A); + searcher.search(q, fc); + + List facetResults = fc.getFacetResults(); + assertEquals("invalid number of facet results", 2, facetResults.size()); + for (FacetResult res : facetResults) { + FacetResultNode root = res.getFacetResultNode(); + assertEquals("wrong count for " + root.label, termExpectedCounts.get(root.label), (int) root.value); + for (FacetResultNode child : root.subResults) { + assertEquals("wrong count for " + child.label, termExpectedCounts.get(child.label), (int) child.value); + } + } + + IOUtils.close(indexReader, taxoReader); + } + + @Test + public void testAllCounts() throws Exception { + DirectoryReader indexReader = DirectoryReader.open(indexDir); + TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); + IndexSearcher searcher = new IndexSearcher(indexReader); + + FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(CP_A, NUM_CHILDREN_CP_A), + new CountFacetRequest(CP_B, NUM_CHILDREN_CP_B)); + FacetsCollector fc = FacetsCollector.create(randomAccumulator(fsp, indexReader, taxoReader)); + searcher.search(new MatchAllDocsQuery(), fc); + + List facetResults = fc.getFacetResults(); + assertEquals("invalid number of facet results", 2, facetResults.size()); + for (FacetResult res : facetResults) { + FacetResultNode root = res.getFacetResultNode(); + assertEquals("wrong count for " + root.label, allExpectedCounts.get(root.label), (int) root.value); + int prevValue = Integer.MAX_VALUE; + int prevOrdinal = Integer.MAX_VALUE; + for (FacetResultNode child : root.subResults) { + assertEquals("wrong count for " + child.label, allExpectedCounts.get(child.label), (int) child.value); + assertTrue("wrong sort order of sub results: child.value=" + child.value + " prevValue=" + prevValue, child.value <= prevValue); + if (child.value == prevValue) { + assertTrue("wrong sort order of sub results", child.ordinal < prevOrdinal); + } + prevValue = (int) child.value; + prevOrdinal = child.ordinal; + } + } + + IOUtils.close(indexReader, taxoReader); + } + + @Test + public void testBigNumResults() throws Exception { + DirectoryReader indexReader = DirectoryReader.open(indexDir); + TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); + IndexSearcher searcher = new IndexSearcher(indexReader); + + FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(CP_A, Integer.MAX_VALUE), + new CountFacetRequest(CP_B, Integer.MAX_VALUE)); + FacetsCollector fc = FacetsCollector.create(randomAccumulator(fsp, indexReader, taxoReader)); + searcher.search(new MatchAllDocsQuery(), fc); + + List facetResults = fc.getFacetResults(); + assertEquals("invalid number of facet results", 2, facetResults.size()); + for (FacetResult res : facetResults) { + FacetResultNode root = res.getFacetResultNode(); + assertEquals("wrong count for " + root.label, allExpectedCounts.get(root.label), (int) root.value); + for (FacetResultNode child : root.subResults) { + assertEquals("wrong count for " + child.label, allExpectedCounts.get(child.label), (int) child.value); + } + } + + IOUtils.close(indexReader, taxoReader); + } + + @Test + public void testNoParents() throws Exception { + DirectoryReader indexReader = DirectoryReader.open(indexDir); + TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); + IndexSearcher searcher = new IndexSearcher(indexReader); + FacetSearchParams fsp = new FacetSearchParams(fip, new CountFacetRequest(CP_C, NUM_CHILDREN_CP_C), + new CountFacetRequest(CP_D, NUM_CHILDREN_CP_D)); + FacetsCollector fc = FacetsCollector.create(randomAccumulator(fsp, indexReader, taxoReader)); + searcher.search(new MatchAllDocsQuery(), fc); + + List facetResults = fc.getFacetResults(); + assertEquals("invalid number of facet results", fsp.facetRequests.size(), facetResults.size()); + for (FacetResult res : facetResults) { + FacetResultNode root = res.getFacetResultNode(); + assertEquals("wrong count for " + root.label, allExpectedCounts.get(root.label), (int) root.value); + for (FacetResultNode child : root.subResults) { + assertEquals("wrong count for " + child.label, allExpectedCounts.get(child.label), (int) child.value); + } + } + + IOUtils.close(indexReader, taxoReader); + } + +} Property changes on: lucene/facet/src/test/org/apache/lucene/facet/search/CountingFacetsAggregatorTest.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/test/org/apache/lucene/facet/search/CountingFacetsCollectorTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/CountingFacetsCollectorTest.java (revision 1443446) +++ lucene/facet/src/test/org/apache/lucene/facet/search/CountingFacetsCollectorTest.java (working copy) @@ -1,475 +0,0 @@ -package org.apache.lucene.facet.search; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Random; - -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field.Store; -import org.apache.lucene.document.StringField; -import org.apache.lucene.facet.FacetTestCase; -import org.apache.lucene.facet.index.FacetFields; -import org.apache.lucene.facet.index.params.CategoryListParams; -import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy; -import org.apache.lucene.facet.index.params.FacetIndexingParams; -import org.apache.lucene.facet.index.params.PerDimensionOrdinalPolicy; -import org.apache.lucene.facet.search.params.CountFacetRequest; -import org.apache.lucene.facet.search.params.FacetRequest; -import org.apache.lucene.facet.search.params.FacetRequest.SortBy; -import org.apache.lucene.facet.search.params.FacetRequest.SortOrder; -import org.apache.lucene.facet.search.params.FacetSearchParams; -import org.apache.lucene.facet.search.params.ScoreFacetRequest; -import org.apache.lucene.facet.search.results.FacetResult; -import org.apache.lucene.facet.search.results.FacetResultNode; -import org.apache.lucene.facet.taxonomy.CategoryPath; -import org.apache.lucene.facet.taxonomy.TaxonomyReader; -import org.apache.lucene.facet.taxonomy.TaxonomyWriter; -import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; -import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; -import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.NoMergePolicy; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.MatchAllDocsQuery; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.store.Directory; -import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.collections.ObjectToIntMap; -import org.apache.lucene.util.encoding.IntEncoder; -import org.apache.lucene.util.encoding.VInt8IntEncoder; -import org.junit.AfterClass; -import org.junit.BeforeClass; -import org.junit.Test; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -public class CountingFacetsCollectorTest extends FacetTestCase { - - private static final Term A = new Term("f", "a"); - private static final CategoryPath CP_A = new CategoryPath("A"), CP_B = new CategoryPath("B"); - private static final CategoryPath CP_C = new CategoryPath("C"), CP_D = new CategoryPath("D"); // indexed w/ NO_PARENTS - private static final int NUM_CHILDREN_CP_A = 5, NUM_CHILDREN_CP_B = 3; - private static final int NUM_CHILDREN_CP_C = 5, NUM_CHILDREN_CP_D = 5; - private static final CategoryPath[] CATEGORIES_A, CATEGORIES_B; - private static final CategoryPath[] CATEGORIES_C, CATEGORIES_D; - static { - CATEGORIES_A = new CategoryPath[NUM_CHILDREN_CP_A]; - for (int i = 0; i < NUM_CHILDREN_CP_A; i++) { - CATEGORIES_A[i] = new CategoryPath(CP_A.components[0], Integer.toString(i)); - } - CATEGORIES_B = new CategoryPath[NUM_CHILDREN_CP_B]; - for (int i = 0; i < NUM_CHILDREN_CP_B; i++) { - CATEGORIES_B[i] = new CategoryPath(CP_B.components[0], Integer.toString(i)); - } - - // NO_PARENTS categories - CATEGORIES_C = new CategoryPath[NUM_CHILDREN_CP_C]; - for (int i = 0; i < NUM_CHILDREN_CP_C; i++) { - CATEGORIES_C[i] = new CategoryPath(CP_C.components[0], Integer.toString(i)); - } - - // Multi-level categories - CATEGORIES_D = new CategoryPath[NUM_CHILDREN_CP_D]; - for (int i = 0; i < NUM_CHILDREN_CP_D; i++) { - String val = Integer.toString(i); - CATEGORIES_D[i] = new CategoryPath(CP_D.components[0], val, val + val); // e.g. D/1/11, D/2/22... - } - } - - private static Directory indexDir, taxoDir; - private static ObjectToIntMap allExpectedCounts, termExpectedCounts; - private static FacetIndexingParams fip; - - @AfterClass - public static void afterClassCountingFacetsCollectorTest() throws Exception { - IOUtils.close(indexDir, taxoDir); - } - - private static List randomCategories(Random random) { - // add random categories from the two dimensions, ensuring that the same - // category is not added twice. - int numFacetsA = random.nextInt(3) + 1; // 1-3 - int numFacetsB = random.nextInt(2) + 1; // 1-2 - ArrayList categories_a = new ArrayList(); - categories_a.addAll(Arrays.asList(CATEGORIES_A)); - ArrayList categories_b = new ArrayList(); - categories_b.addAll(Arrays.asList(CATEGORIES_B)); - Collections.shuffle(categories_a, random); - Collections.shuffle(categories_b, random); - - ArrayList categories = new ArrayList(); - categories.addAll(categories_a.subList(0, numFacetsA)); - categories.addAll(categories_b.subList(0, numFacetsB)); - - // add the NO_PARENT categories - categories.add(CATEGORIES_C[random().nextInt(NUM_CHILDREN_CP_C)]); - categories.add(CATEGORIES_D[random().nextInt(NUM_CHILDREN_CP_D)]); - - return categories; - } - - private static void addField(Document doc) { - doc.add(new StringField(A.field(), A.text(), Store.NO)); - } - - private static void addFacets(Document doc, FacetFields facetFields, boolean updateTermExpectedCounts) - throws IOException { - List docCategories = randomCategories(random()); - for (CategoryPath cp : docCategories) { - if (cp.components[0].equals(CP_D.components[0])) { - cp = cp.subpath(2); // we'll get counts for the 2nd level only - } - allExpectedCounts.put(cp, allExpectedCounts.get(cp) + 1); - if (updateTermExpectedCounts) { - termExpectedCounts.put(cp, termExpectedCounts.get(cp) + 1); - } - } - // add 1 to each NO_PARENTS dimension - allExpectedCounts.put(CP_B, allExpectedCounts.get(CP_B) + 1); - allExpectedCounts.put(CP_C, allExpectedCounts.get(CP_C) + 1); - allExpectedCounts.put(CP_D, allExpectedCounts.get(CP_D) + 1); - if (updateTermExpectedCounts) { - termExpectedCounts.put(CP_B, termExpectedCounts.get(CP_B) + 1); - termExpectedCounts.put(CP_C, termExpectedCounts.get(CP_C) + 1); - termExpectedCounts.put(CP_D, termExpectedCounts.get(CP_D) + 1); - } - - facetFields.addFields(doc, docCategories); - } - - private static void indexDocsNoFacets(IndexWriter indexWriter) throws IOException { - int numDocs = atLeast(2); - for (int i = 0; i < numDocs; i++) { - Document doc = new Document(); - addField(doc); - indexWriter.addDocument(doc); - } - indexWriter.commit(); // flush a segment - } - - private static void indexDocsWithFacetsNoTerms(IndexWriter indexWriter, TaxonomyWriter taxoWriter, - ObjectToIntMap expectedCounts) throws IOException { - Random random = random(); - int numDocs = atLeast(random, 2); - FacetFields facetFields = new FacetFields(taxoWriter, fip); - for (int i = 0; i < numDocs; i++) { - Document doc = new Document(); - addFacets(doc, facetFields, false); - indexWriter.addDocument(doc); - } - indexWriter.commit(); // flush a segment - } - - private static void indexDocsWithFacetsAndTerms(IndexWriter indexWriter, TaxonomyWriter taxoWriter, - ObjectToIntMap expectedCounts) throws IOException { - Random random = random(); - int numDocs = atLeast(random, 2); - FacetFields facetFields = new FacetFields(taxoWriter, fip); - for (int i = 0; i < numDocs; i++) { - Document doc = new Document(); - addFacets(doc, facetFields, true); - addField(doc); - indexWriter.addDocument(doc); - } - indexWriter.commit(); // flush a segment - } - - private static void indexDocsWithFacetsAndSomeTerms(IndexWriter indexWriter, TaxonomyWriter taxoWriter, - ObjectToIntMap expectedCounts) throws IOException { - Random random = random(); - int numDocs = atLeast(random, 2); - FacetFields facetFields = new FacetFields(taxoWriter, fip); - for (int i = 0; i < numDocs; i++) { - Document doc = new Document(); - boolean hasContent = random.nextBoolean(); - if (hasContent) { - addField(doc); - } - addFacets(doc, facetFields, hasContent); - indexWriter.addDocument(doc); - } - indexWriter.commit(); // flush a segment - } - - // initialize expectedCounts w/ 0 for all categories - private static ObjectToIntMap newCounts() { - ObjectToIntMap counts = new ObjectToIntMap(); - counts.put(CP_A, 0); - counts.put(CP_B, 0); - counts.put(CP_C, 0); - counts.put(CP_D, 0); - for (CategoryPath cp : CATEGORIES_A) { - counts.put(cp, 0); - } - for (CategoryPath cp : CATEGORIES_B) { - counts.put(cp, 0); - } - for (CategoryPath cp : CATEGORIES_C) { - counts.put(cp, 0); - } - for (CategoryPath cp : CATEGORIES_D) { - counts.put(cp.subpath(2), 0); - } - return counts; - } - - @BeforeClass - public static void beforeClassCountingFacetsCollectorTest() throws Exception { - indexDir = newDirectory(); - taxoDir = newDirectory(); - - // create an index which has: - // 1. Segment with no categories, but matching results - // 2. Segment w/ categories, but no results - // 3. Segment w/ categories and results - // 4. Segment w/ categories, but only some results - - IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); - conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES); // prevent merges, so we can control the index segments - IndexWriter indexWriter = new IndexWriter(indexDir, conf); - TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); - - Map policies = new HashMap(); - policies.put(CP_B.components[0], OrdinalPolicy.ALL_PARENTS); - policies.put(CP_C.components[0], OrdinalPolicy.NO_PARENTS); - policies.put(CP_D.components[0], OrdinalPolicy.NO_PARENTS); - CategoryListParams clp = new PerDimensionOrdinalPolicy(policies); - fip = new FacetIndexingParams(clp); - - allExpectedCounts = newCounts(); - termExpectedCounts = newCounts(); - - // segment w/ no categories - indexDocsNoFacets(indexWriter); - - // segment w/ categories, no content - indexDocsWithFacetsNoTerms(indexWriter, taxoWriter, allExpectedCounts); - - // segment w/ categories and content - indexDocsWithFacetsAndTerms(indexWriter, taxoWriter, allExpectedCounts); - - // segment w/ categories and some content - indexDocsWithFacetsAndSomeTerms(indexWriter, taxoWriter, allExpectedCounts); - - IOUtils.close(indexWriter, taxoWriter); - } - - @Test - public void testInvalidParams() throws Exception { - final CategoryPath dummyCP = new CategoryPath("a"); - final FacetRequest dummyFR = new CountFacetRequest(dummyCP, 10); - - // only CountFacetRequests are allowed - assertNotNull("only CountFacetRequests should be allowed", - CountingFacetsCollector.assertParams(new FacetSearchParams(new ScoreFacetRequest(dummyCP, 10)))); - - // only depth=1 - FacetRequest cfr = new CountFacetRequest(dummyCP, 10); - cfr.setDepth(2); - assertNotNull("only depth 1 should be allowed", CountingFacetsCollector.assertParams(new FacetSearchParams(cfr))); - - // only SortOrder.DESCENDING - cfr = new CountFacetRequest(dummyCP, 10); - cfr.setSortOrder(SortOrder.ASCENDING); - assertNotNull("only SortOrder.DESCENDING should be allowed", CountingFacetsCollector.assertParams(new FacetSearchParams(cfr))); - - // only SortBy.VALUE - cfr = new CountFacetRequest(dummyCP, 10); - cfr.setSortBy(SortBy.ORDINAL); - assertNotNull("only SortBy.VALUE should be allowed", CountingFacetsCollector.assertParams(new FacetSearchParams(cfr))); - - // no numToLabel - cfr = new CountFacetRequest(dummyCP, 10); - cfr.setNumLabel(2); - assertNotNull("numToLabel should not be allowed", CountingFacetsCollector.assertParams(new FacetSearchParams(cfr))); - - FacetIndexingParams fip = new FacetIndexingParams() { - @Override - public CategoryListParams getCategoryListParams(CategoryPath category) { - return new CategoryListParams(); - } - }; - assertNotNull("only one CLP should be allowed", CountingFacetsCollector.assertParams(new FacetSearchParams(fip, dummyFR, - new CountFacetRequest(new CategoryPath("moo"), 10)))); - - fip = new FacetIndexingParams(new CategoryListParams("moo")) { - final CategoryListParams clp = new CategoryListParams() { - @Override - public IntEncoder createEncoder() { - return new VInt8IntEncoder(); - } - }; - @Override - public List getAllCategoryListParams() { - return Collections.singletonList(clp); - } - - @Override - public CategoryListParams getCategoryListParams(CategoryPath category) { - return clp; - } - }; - assertNotNull("only DGapVIntEncoder should be allowed", CountingFacetsCollector.assertParams(new FacetSearchParams(fip, dummyFR))); - - fip = new FacetIndexingParams(new CategoryListParams("moo")) { - @Override - public int getPartitionSize() { - return 2; - } - }; - assertNotNull("partitions should be allowed", CountingFacetsCollector.assertParams(new FacetSearchParams(fip, dummyFR))); - } - - @Test - public void testDifferentNumResults() throws Exception { - // test the collector w/ FacetRequests and different numResults - DirectoryReader indexReader = DirectoryReader.open(indexDir); - TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); - IndexSearcher searcher = new IndexSearcher(indexReader); - - FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(CP_A, NUM_CHILDREN_CP_A), - new CountFacetRequest(CP_B, NUM_CHILDREN_CP_B)); - FacetsCollector fc = new CountingFacetsCollector(fsp , taxoReader); - TermQuery q = new TermQuery(A); - searcher.search(q, fc); - - List facetResults = fc.getFacetResults(); - assertEquals("invalid number of facet results", 2, facetResults.size()); - for (FacetResult res : facetResults) { - FacetResultNode root = res.getFacetResultNode(); - assertEquals("wrong count for " + root.label, termExpectedCounts.get(root.label), (int) root.value); - for (FacetResultNode child : root.subResults) { - assertEquals("wrong count for " + child.label, termExpectedCounts.get(child.label), (int) child.value); - } - } - - IOUtils.close(indexReader, taxoReader); - } - - @Test - public void testAllCounts() throws Exception { - DirectoryReader indexReader = DirectoryReader.open(indexDir); - TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); - IndexSearcher searcher = new IndexSearcher(indexReader); - - FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(CP_A, NUM_CHILDREN_CP_A), - new CountFacetRequest(CP_B, NUM_CHILDREN_CP_B)); - FacetsCollector fc = new CountingFacetsCollector(fsp , taxoReader); - searcher.search(new MatchAllDocsQuery(), fc); - - List facetResults = fc.getFacetResults(); - assertEquals("invalid number of facet results", 2, facetResults.size()); - for (FacetResult res : facetResults) { - FacetResultNode root = res.getFacetResultNode(); - assertEquals("wrong count for " + root.label, allExpectedCounts.get(root.label), (int) root.value); - int prevValue = Integer.MAX_VALUE; - int prevOrdinal = Integer.MAX_VALUE; - for (FacetResultNode child : root.subResults) { - assertEquals("wrong count for " + child.label, allExpectedCounts.get(child.label), (int) child.value); - assertTrue("wrong sort order of sub results: child.value=" + child.value + " prevValue=" + prevValue, child.value <= prevValue); - if (child.value == prevValue) { - assertTrue("wrong sort order of sub results", child.ordinal < prevOrdinal); - } - prevValue = (int) child.value; - prevOrdinal = child.ordinal; - } - } - - IOUtils.close(indexReader, taxoReader); - } - - @Test - public void testBigNumResults() throws Exception { - DirectoryReader indexReader = DirectoryReader.open(indexDir); - TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); - IndexSearcher searcher = new IndexSearcher(indexReader); - - FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(CP_A, Integer.MAX_VALUE), - new CountFacetRequest(CP_B, Integer.MAX_VALUE)); - FacetsCollector fc = new CountingFacetsCollector(fsp , taxoReader); - searcher.search(new MatchAllDocsQuery(), fc); - - List facetResults = fc.getFacetResults(); - assertEquals("invalid number of facet results", 2, facetResults.size()); - for (FacetResult res : facetResults) { - FacetResultNode root = res.getFacetResultNode(); - assertEquals("wrong count for " + root.label, allExpectedCounts.get(root.label), (int) root.value); - for (FacetResultNode child : root.subResults) { - assertEquals("wrong count for " + child.label, allExpectedCounts.get(child.label), (int) child.value); - } - } - - IOUtils.close(indexReader, taxoReader); - } - - @Test - public void testDirectSource() throws Exception { - DirectoryReader indexReader = DirectoryReader.open(indexDir); - TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); - IndexSearcher searcher = new IndexSearcher(indexReader); - - FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(CP_A, NUM_CHILDREN_CP_A), - new CountFacetRequest(CP_B, NUM_CHILDREN_CP_B)); - FacetsCollector fc = new CountingFacetsCollector(fsp , taxoReader, new FacetArrays(taxoReader.getSize()), true); - searcher.search(new MatchAllDocsQuery(), fc); - - List facetResults = fc.getFacetResults(); - assertEquals("invalid number of facet results", 2, facetResults.size()); - for (FacetResult res : facetResults) { - FacetResultNode root = res.getFacetResultNode(); - assertEquals("wrong count for " + root.label, allExpectedCounts.get(root.label), (int) root.value); - for (FacetResultNode child : root.subResults) { - assertEquals("wrong count for " + child.label, allExpectedCounts.get(child.label), (int) child.value); - } - } - - IOUtils.close(indexReader, taxoReader); - } - - @Test - public void testNoParents() throws Exception { - DirectoryReader indexReader = DirectoryReader.open(indexDir); - TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); - IndexSearcher searcher = new IndexSearcher(indexReader); - FacetSearchParams fsp = new FacetSearchParams(fip, new CountFacetRequest(CP_C, NUM_CHILDREN_CP_C), - new CountFacetRequest(CP_D, NUM_CHILDREN_CP_D)); - FacetsCollector fc = new CountingFacetsCollector(fsp , taxoReader); - searcher.search(new MatchAllDocsQuery(), fc); - - List facetResults = fc.getFacetResults(); - assertEquals("invalid number of facet results", fsp.facetRequests.size(), facetResults.size()); - for (FacetResult res : facetResults) { - FacetResultNode root = res.getFacetResultNode(); - assertEquals("wrong count for " + root.label, allExpectedCounts.get(root.label), (int) root.value); - for (FacetResultNode child : root.subResults) { - assertEquals("wrong count for " + child.label, allExpectedCounts.get(child.label), (int) child.value); - } - } - - IOUtils.close(indexReader, taxoReader); - } - -} Index: lucene/facet/src/test/org/apache/lucene/facet/search/SamplingWrapperTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/SamplingWrapperTest.java (revision 1443446) +++ lucene/facet/src/test/org/apache/lucene/facet/search/SamplingWrapperTest.java (working copy) @@ -29,10 +29,9 @@ public class SamplingWrapperTest extends BaseSampleTestTopK { @Override - protected FacetsAccumulator getSamplingAccumulator(Sampler sampler, TaxonomyReader taxoReader, + protected StandardFacetsAccumulator getSamplingAccumulator(Sampler sampler, TaxonomyReader taxoReader, IndexReader indexReader, FacetSearchParams searchParams) { - FacetsAccumulator fa = new StandardFacetsAccumulator(searchParams, indexReader, taxoReader); - return new SamplingWrapper(fa, sampler); + return new SamplingWrapper(new StandardFacetsAccumulator(searchParams, indexReader, taxoReader), sampler); } } Index: lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsAccumulatorWithComplement.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsAccumulatorWithComplement.java (revision 1443446) +++ lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsAccumulatorWithComplement.java (working copy) @@ -3,28 +3,22 @@ import java.io.IOException; import java.util.List; +import org.apache.lucene.facet.FacetTestBase; +import org.apache.lucene.facet.index.params.FacetIndexingParams; +import org.apache.lucene.facet.search.params.CountFacetRequest; +import org.apache.lucene.facet.search.params.FacetSearchParams; +import org.apache.lucene.facet.search.results.FacetResult; +import org.apache.lucene.facet.search.results.FacetResultNode; +import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.ParallelAtomicReader; import org.apache.lucene.index.SlowCompositeReaderWrapper; import org.apache.lucene.search.MatchAllDocsQuery; -import org.apache.lucene.search.Query; import org.junit.After; import org.junit.Before; import org.junit.Test; -import org.apache.lucene.facet.FacetTestBase; -import org.apache.lucene.facet.index.params.FacetIndexingParams; -import org.apache.lucene.facet.search.FacetsAccumulator; -import org.apache.lucene.facet.search.ScoredDocIDs; -import org.apache.lucene.facet.search.ScoredDocIdCollector; -import org.apache.lucene.facet.search.StandardFacetsAccumulator; -import org.apache.lucene.facet.search.params.CountFacetRequest; -import org.apache.lucene.facet.search.params.FacetSearchParams; -import org.apache.lucene.facet.search.results.FacetResult; -import org.apache.lucene.facet.search.results.FacetResultNode; -import org.apache.lucene.facet.taxonomy.CategoryPath; - /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -104,17 +98,9 @@ } private void doTestComplements() throws Exception { - Query q = new MatchAllDocsQuery(); //new TermQuery(new Term(TEXT,"white")); - if (VERBOSE) { - System.out.println("Query: "+q); - } - ScoredDocIdCollector dCollector = - ScoredDocIdCollector.create(indexReader.maxDoc(),false); // scoring is disabled - searcher.search(q, dCollector); - // verify by facet values - List countResWithComplement = findFacets(dCollector.getScoredDocIDs(), true); - List countResNoComplement = findFacets(dCollector.getScoredDocIDs(), false); + List countResWithComplement = findFacets(true); + List countResNoComplement = findFacets(false); assertEquals("Wrong number of facet count results with complement!",1,countResWithComplement.size()); assertEquals("Wrong number of facet count results no complement!",1,countResNoComplement.size()); @@ -124,21 +110,18 @@ assertEquals("Wrong number of top count aggregated categories with complement!",3,parentResWithComp.subResults.size()); assertEquals("Wrong number of top count aggregated categories no complement!",3,parentResNoComp.subResults.size()); - } /** compute facets with certain facet requests and docs */ - private List findFacets(ScoredDocIDs sDocids, boolean withComplement) throws IOException { + private List findFacets(boolean withComplement) throws IOException { FacetSearchParams fsp = new FacetSearchParams(fip, new CountFacetRequest(new CategoryPath("root","a"), 10)); - FacetsAccumulator fAccumulator = new StandardFacetsAccumulator(fsp, indexReader, taxoReader); + StandardFacetsAccumulator sfa = new StandardFacetsAccumulator(fsp, indexReader, taxoReader); + sfa.setComplementThreshold(withComplement ? StandardFacetsAccumulator.FORCE_COMPLEMENT : StandardFacetsAccumulator.DISABLE_COMPLEMENT); + FacetsCollector fc = FacetsCollector.create(sfa); + searcher.search(new MatchAllDocsQuery(), fc); - fAccumulator.setComplementThreshold( - withComplement ? - FacetsAccumulator.FORCE_COMPLEMENT: - FacetsAccumulator.DISABLE_COMPLEMENT); + List res = fc.getFacetResults(); - List res = fAccumulator.accumulate(sDocids); - // Results are ready, printing them... int i = 0; for (FacetResult facetResult : res) { @@ -147,7 +130,7 @@ } } - assertEquals(withComplement, ((StandardFacetsAccumulator) fAccumulator).isUsingComplements); + assertEquals(withComplement, sfa.isUsingComplements); return res; } Index: lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsCollector.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsCollector.java (revision 1443446) +++ lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsCollector.java (working copy) @@ -1,7 +1,11 @@ package org.apache.lucene.facet.search; +import java.io.IOException; +import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; @@ -9,8 +13,13 @@ import org.apache.lucene.document.StringField; import org.apache.lucene.facet.FacetTestCase; import org.apache.lucene.facet.index.FacetFields; +import org.apache.lucene.facet.index.params.CategoryListParams; +import org.apache.lucene.facet.index.params.FacetIndexingParams; +import org.apache.lucene.facet.index.params.PerDimensionIndexingParams; +import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; +import org.apache.lucene.facet.search.params.CountFacetRequest; import org.apache.lucene.facet.search.params.FacetSearchParams; -import org.apache.lucene.facet.search.params.ScoreFacetRequest; +import org.apache.lucene.facet.search.params.SumScoreFacetRequest; import org.apache.lucene.facet.search.results.FacetResult; import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.facet.taxonomy.TaxonomyWriter; @@ -18,7 +27,6 @@ import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MultiCollector; @@ -47,16 +55,15 @@ public class TestFacetsCollector extends FacetTestCase { @Test - public void testFacetsWithDocScore() throws Exception { + public void testSumScoreAggregator() throws Exception { Directory indexDir = newDirectory(); Directory taxoDir = newDirectory(); TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(taxoDir); - IndexWriter iw = new IndexWriter(indexDir, new IndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer(random()))); + IndexWriter iw = new IndexWriter(indexDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); FacetFields facetFields = new FacetFields(taxonomyWriter); - for(int i = atLeast(2000); i > 0; --i) { + for(int i = atLeast(30); i > 0; --i) { Document doc = new Document(); doc.add(new StringField("f", "v", Store.NO)); facetFields.addFields(doc, Collections.singletonList(new CategoryPath("a"))); @@ -66,12 +73,17 @@ taxonomyWriter.close(); iw.close(); - FacetSearchParams sParams = new FacetSearchParams(new ScoreFacetRequest(new CategoryPath("a"), 10)); - DirectoryReader r = DirectoryReader.open(indexDir); DirectoryTaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDir); - FacetsCollector fc = FacetsCollector.create(sParams, r, taxo); + FacetSearchParams sParams = new FacetSearchParams(new SumScoreFacetRequest(new CategoryPath("a"), 10)); + FacetsAccumulator fa = new FacetsAccumulator(sParams, r, taxo) { + @Override + public FacetsAggregator getAggregator() { + return new SumScoreFacetsAggregator(); + } + }; + FacetsCollector fc = FacetsCollector.create(fa); TopScoreDocCollector topDocs = TopScoreDocCollector.create(10, false); new IndexSearcher(r).search(new MatchAllDocsQuery(), MultiCollector.wrap(fc, topDocs)); @@ -83,4 +95,119 @@ IOUtils.close(taxo, taxoDir, r, indexDir); } + @Test + public void testMultiCountingLists() throws Exception { + Directory indexDir = newDirectory(); + Directory taxoDir = newDirectory(); + + TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(taxoDir); + IndexWriter iw = new IndexWriter(indexDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); + FacetIndexingParams fip = new PerDimensionIndexingParams(Collections.singletonMap(new CategoryPath("b"), new CategoryListParams("$b"))); + + FacetFields facetFields = new FacetFields(taxonomyWriter, fip); + for(int i = atLeast(30); i > 0; --i) { + Document doc = new Document(); + doc.add(new StringField("f", "v", Store.NO)); + List cats = new ArrayList(); + cats.add(new CategoryPath("a")); + cats.add(new CategoryPath("b")); + facetFields.addFields(doc, cats); + iw.addDocument(doc); + } + + taxonomyWriter.close(); + iw.close(); + + DirectoryReader r = DirectoryReader.open(indexDir); + DirectoryTaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDir); + + FacetSearchParams sParams = new FacetSearchParams(fip, + new CountFacetRequest(new CategoryPath("a"), 10), + new CountFacetRequest(new CategoryPath("b"), 10)); + FacetsCollector fc = FacetsCollector.create(sParams, r, taxo); + new IndexSearcher(r).search(new MatchAllDocsQuery(), fc); + + for (FacetResult res : fc.getFacetResults()) { + assertEquals("unexpected count for " + res, r.maxDoc(), (int) res.getFacetResultNode().value); + } + + IOUtils.close(taxo, taxoDir, r, indexDir); + } + + @Test + public void testCountAndSumScore() throws Exception { + Directory indexDir = newDirectory(); + Directory taxoDir = newDirectory(); + + TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(taxoDir); + IndexWriter iw = new IndexWriter(indexDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); + FacetIndexingParams fip = new PerDimensionIndexingParams(Collections.singletonMap(new CategoryPath("b"), new CategoryListParams("$b"))); + + FacetFields facetFields = new FacetFields(taxonomyWriter, fip); + for(int i = atLeast(30); i > 0; --i) { + Document doc = new Document(); + doc.add(new StringField("f", "v", Store.NO)); + List cats = new ArrayList(); + cats.add(new CategoryPath("a")); + cats.add(new CategoryPath("b")); + facetFields.addFields(doc, cats); + iw.addDocument(doc); + } + + taxonomyWriter.close(); + iw.close(); + + DirectoryReader r = DirectoryReader.open(indexDir); + DirectoryTaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDir); + + FacetSearchParams sParams = new FacetSearchParams(fip, + new CountFacetRequest(new CategoryPath("a"), 10), + new SumScoreFacetRequest(new CategoryPath("b"), 10)); + + final Map clpAggregator = new HashMap(); + clpAggregator.put(fip.getCategoryListParams(new CategoryPath("a")), new FastCountingFacetsAggregator()); + clpAggregator.put(fip.getCategoryListParams(new CategoryPath("b")), new SumScoreFacetsAggregator()); + FacetsAccumulator fa = new FacetsAccumulator(sParams, r, taxo) { + @Override + public FacetsAggregator getAggregator() { + return new FacetsAggregator() { + + @Override + public void rollupValues(int ordinal, int[] children, int[] siblings, FacetArrays facetArrays) { + throw new UnsupportedOperationException("not supported yet"); + } + + @Override + public boolean requiresDocScores() { + for (FacetsAggregator aggregator : clpAggregator.values()) { + if (aggregator.requiresDocScores()) { + return true; + } + } + return false; + } + + @Override + public void aggregate(MatchingDocs matchingDocs, CategoryListParams clp, FacetArrays facetArrays) throws IOException { + clpAggregator.get(clp).aggregate(matchingDocs, clp, facetArrays); + } + }; + } + }; + + FacetsCollector fc = FacetsCollector.create(fa); + TopScoreDocCollector topDocs = TopScoreDocCollector.create(10, false); + new IndexSearcher(r).search(new MatchAllDocsQuery(), MultiCollector.wrap(fc, topDocs)); + + List facetResults = fc.getFacetResults(); + FacetResult fresA = facetResults.get(0); + assertEquals("unexpected count for " + fresA, r.maxDoc(), (int) fresA.getFacetResultNode().value); + + FacetResult fresB = facetResults.get(1); + double expected = topDocs.topDocs().getMaxScore() * r.numDocs(); + assertEquals("unexpected value for " + fresB, expected, fresB.getFacetResultNode().value, 1E-10); + + IOUtils.close(taxo, taxoDir, r, indexDir); + } + } Index: lucene/facet/src/test/org/apache/lucene/facet/search/TestScoredDocIdCollector.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/TestScoredDocIdCollector.java (revision 1443446) +++ lucene/facet/src/test/org/apache/lucene/facet/search/TestScoredDocIdCollector.java (working copy) @@ -1,152 +0,0 @@ -package org.apache.lucene.facet.search; - -import java.io.IOException; -import java.util.Arrays; -import java.util.List; - -import org.apache.lucene.facet.FacetTestBase; -import org.apache.lucene.facet.index.params.FacetIndexingParams; -import org.apache.lucene.facet.search.params.CountFacetRequest; -import org.apache.lucene.facet.search.params.FacetSearchParams; -import org.apache.lucene.facet.search.params.ScoreFacetRequest; -import org.apache.lucene.facet.search.results.FacetResult; -import org.apache.lucene.facet.search.results.FacetResultNode; -import org.apache.lucene.facet.taxonomy.CategoryPath; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.TermQuery; -import org.junit.Before; -import org.junit.Test; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** Test ScoredDocIdCollector. */ -public class TestScoredDocIdCollector extends FacetTestBase { - - private FacetIndexingParams fip; - - @Override - @Before - public void setUp() throws Exception { - super.setUp(); - fip = getFacetIndexingParams(Integer.MAX_VALUE); - initIndex(fip); - } - - @Override - public void tearDown() throws Exception { - closeAll(); - super.tearDown(); - } - - @Test - public void testConstantScore() throws Exception { - // test that constant score works well - - Query q = new TermQuery(new Term(CONTENT_FIELD, "white")); - if (VERBOSE) { - System.out.println("Query: " + q); - } - float constScore = 17.0f; - ScoredDocIdCollector dCollector = ScoredDocIdCollector.create(indexReader.maxDoc(), false); // scoring is disabled - dCollector.setDefaultScore(constScore); - searcher.search(q, dCollector); - - // verify by doc scores at the level of doc-id-iterator - ScoredDocIDs scoredDocIDs = dCollector.getScoredDocIDs(); - assertEquals("Wrong number of matching documents!", 2, scoredDocIDs.size()); - ScoredDocIDsIterator docItr = scoredDocIDs.iterator(); - while (docItr.next()) { - assertEquals("Wrong score for doc " + docItr.getDocID(), constScore, docItr.getScore(), Double.MIN_VALUE); - } - - // verify by facet values - CategoryPath cp = new CategoryPath("root","a"); - FacetSearchParams countFSP = new FacetSearchParams(fip, new CountFacetRequest(cp, 10)); - FacetSearchParams scoreFSP = new FacetSearchParams(fip, new ScoreFacetRequest(cp, 10)); - - List countRes = findFacets(scoredDocIDs, countFSP); - List scoreRes = findFacets(scoredDocIDs, scoreFSP); - - assertEquals("Wrong number of facet count results!", 1, countRes.size()); - assertEquals("Wrong number of facet score results!", 1, scoreRes.size()); - - FacetResultNode parentCountRes = countRes.get(0).getFacetResultNode(); - FacetResultNode parentScoreRes = scoreRes.get(0).getFacetResultNode(); - - assertEquals("Wrong number of top count aggregated categories!", 3, parentCountRes.subResults.size()); - assertEquals("Wrong number of top score aggregated categories!", 3, parentScoreRes.subResults.size()); - - // rely on that facet value is computed as doc-score, and - // accordingly compare values of the two top-category results. - - FacetResultNode[] countResNodes = resultNodesAsArray(parentCountRes); - FacetResultNode[] scoreResNodes = resultNodesAsArray(parentScoreRes); - - for (int i = 0; i < scoreResNodes.length; i++) { - assertEquals("Ordinals differ!", countResNodes[i].ordinal, scoreResNodes[i].ordinal); - assertEquals("Wrong scores!", constScore * countResNodes[i].value, scoreResNodes[i].value, Double.MIN_VALUE); - } - } - - // compute facets with certain facet requests and docs - private List findFacets(ScoredDocIDs sDocids, FacetSearchParams facetSearchParams) throws IOException { - FacetsAccumulator fAccumulator = new StandardFacetsAccumulator(facetSearchParams, indexReader, taxoReader); - List res = fAccumulator.accumulate(sDocids); - - // Results are ready, printing them... - int i = 0; - for (FacetResult facetResult : res) { - if (VERBOSE) { - System.out.println("Res " + (i++) + ": " + facetResult); - } - } - - return res; - } - - @Test - public void testOutOfOrderCollectionScoringEnabled() throws Exception { - assertFalse( - "when scoring enabled, out-of-order collection should not be supported", - ScoredDocIdCollector.create(1, true).acceptsDocsOutOfOrder()); - } - - @Test - public void testOutOfOrderCollectionScoringDisabled() throws Exception { - // This used to fail, because ScoredDocIdCollector.acceptDocsOutOfOrder - // returned true, even when scoring was enabled. - final int[] docs = new int[] { 1, 0, 2 }; // out of order on purpose - - ScoredDocIdCollector sdic = ScoredDocIdCollector.create(docs.length, false); - assertTrue( - "when scoring disabled, out-of-order collection should be supported", - sdic.acceptsDocsOutOfOrder()); - for (int i = 0; i < docs.length; i++) { - sdic.collect(docs[i]); - } - - assertEquals("expected 3 documents but got " + sdic.getScoredDocIDs().size(), 3, sdic.getScoredDocIDs().size()); - ScoredDocIDsIterator iter = sdic.getScoredDocIDs().iterator(); - Arrays.sort(docs); - for (int i = 0; iter.next(); i++) { - assertEquals("expected doc " + docs[i], docs[i], iter.getDocID()); - } - } - -} Index: lucene/facet/src/test/org/apache/lucene/facet/search/TestStandardFacetsAccumulator.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/TestStandardFacetsAccumulator.java (revision 1443446) +++ lucene/facet/src/test/org/apache/lucene/facet/search/TestStandardFacetsAccumulator.java (working copy) @@ -101,14 +101,9 @@ // search for "f:a", only segments 1 and 3 should match results Query q = new TermQuery(new Term("f", "a")); - FacetRequest countNoComplements = new CountFacetRequest(new CategoryPath("A"), 10) { - @Override - public boolean supportsComplements() { - return false; // disable complements - } - }; + FacetRequest countNoComplements = new CountFacetRequest(new CategoryPath("A"), 10); FacetSearchParams fsp = new FacetSearchParams(fip, countNoComplements); - FacetsCollector fc = new StandardFacetsCollector(fsp , indexReader, taxoReader); + FacetsCollector fc = FacetsCollector.create(fsp , indexReader, taxoReader); indexSearcher.search(q, fc); List results = fc.getFacetResults(); assertEquals("received too many facet results", 1, results.size()); Index: lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKInEachNodeResultHandler.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKInEachNodeResultHandler.java (revision 1443446) +++ lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKInEachNodeResultHandler.java (working copy) @@ -109,48 +109,36 @@ // Get all of the documents and run the query, then do different // facet counts and compare to control Query q = new TermQuery(new Term("content", "alpha")); - ScoredDocIdCollector scoredDoc = ScoredDocIdCollector.create(ir.maxDoc(), true); - // Collector collector = new MultiCollector(scoredDoc); - is.search(q, scoredDoc); - - CountFacetRequest cfra23 = new CountFacetRequest( - new CategoryPath("a"), 2); + CountFacetRequest cfra23 = new CountFacetRequest(new CategoryPath("a"), 2); cfra23.setDepth(3); cfra23.setResultMode(ResultMode.PER_NODE_IN_TREE); - CountFacetRequest cfra22 = new CountFacetRequest( - new CategoryPath("a"), 2); + CountFacetRequest cfra22 = new CountFacetRequest(new CategoryPath("a"), 2); cfra22.setDepth(2); cfra22.setResultMode(ResultMode.PER_NODE_IN_TREE); - CountFacetRequest cfra21 = new CountFacetRequest( - new CategoryPath("a"), 2); + CountFacetRequest cfra21 = new CountFacetRequest(new CategoryPath("a"), 2); cfra21.setDepth(1); cfra21.setResultMode(ResultMode.PER_NODE_IN_TREE); - CountFacetRequest cfrb22 = new CountFacetRequest( - new CategoryPath("a", "b"), 2); + CountFacetRequest cfrb22 = new CountFacetRequest(new CategoryPath("a", "b"), 2); cfrb22.setDepth(2); cfrb22.setResultMode(ResultMode.PER_NODE_IN_TREE); - CountFacetRequest cfrb23 = new CountFacetRequest( - new CategoryPath("a", "b"), 2); + CountFacetRequest cfrb23 = new CountFacetRequest(new CategoryPath("a", "b"), 2); cfrb23.setDepth(3); cfrb23.setResultMode(ResultMode.PER_NODE_IN_TREE); - CountFacetRequest cfrb21 = new CountFacetRequest( - new CategoryPath("a", "b"), 2); + CountFacetRequest cfrb21 = new CountFacetRequest(new CategoryPath("a", "b"), 2); cfrb21.setDepth(1); cfrb21.setResultMode(ResultMode.PER_NODE_IN_TREE); - CountFacetRequest doctor = new CountFacetRequest( - new CategoryPath("Doctor"), 2); + CountFacetRequest doctor = new CountFacetRequest(new CategoryPath("Doctor"), 2); doctor.setDepth(1); doctor.setResultMode(ResultMode.PER_NODE_IN_TREE); - CountFacetRequest cfrb20 = new CountFacetRequest( - new CategoryPath("a", "b"), 2); + CountFacetRequest cfrb20 = new CountFacetRequest(new CategoryPath("a", "b"), 2); cfrb20.setDepth(0); cfrb20.setResultMode(ResultMode.PER_NODE_IN_TREE); @@ -166,17 +154,13 @@ FacetSearchParams facetSearchParams = new FacetSearchParams(facetRequests, iParams); FacetArrays facetArrays = new FacetArrays(PartitionsUtils.partitionSize(facetSearchParams.indexingParams, tr)); - FacetsAccumulator fctExtrctr = new StandardFacetsAccumulator(facetSearchParams, is.getIndexReader(), tr, facetArrays); - fctExtrctr.setComplementThreshold(FacetsAccumulator.DISABLE_COMPLEMENT); - long start = System.currentTimeMillis(); + StandardFacetsAccumulator sfa = new StandardFacetsAccumulator(facetSearchParams, is.getIndexReader(), tr, facetArrays); + sfa.setComplementThreshold(StandardFacetsAccumulator.DISABLE_COMPLEMENT); + FacetsCollector fc = FacetsCollector.create(sfa); + + is.search(q, fc); + List facetResults = fc.getFacetResults(); - List facetResults = fctExtrctr.accumulate(scoredDoc.getScoredDocIDs()); - - long end = System.currentTimeMillis(); - if (VERBOSE) { - System.out.println("Time: " + (end - start)); - } - FacetResult fr = facetResults.get(0); // a, depth=3, K=2 boolean hasDoctor = "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]); assertEquals(9, fr.getNumValidDescendants()); @@ -203,7 +187,7 @@ } // now rearrange double [] expectedValues00 = { 6.0, 1.0, 5.0, 3.0, 2.0 }; - fr = cfra23.createFacetResultsHandler(tr).rearrangeFacetResult(fr); + fr = sfa.createFacetResultsHandler(cfra23).rearrangeFacetResult(fr); i = 0; for (FacetResultNode node : parentRes.subResults) { assertEquals(expectedValues00[i++], node.value, Double.MIN_VALUE); Index: lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKResultsHandler.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKResultsHandler.java (revision 1443446) +++ lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKResultsHandler.java (working copy) @@ -13,8 +13,6 @@ import org.apache.lucene.facet.search.results.FacetResult; import org.apache.lucene.facet.search.results.FacetResultNode; import org.apache.lucene.facet.taxonomy.CategoryPath; -import org.apache.lucene.facet.taxonomy.TaxonomyReader; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.MatchAllDocsQuery; import org.junit.Test; @@ -92,16 +90,8 @@ // do different facet counts and compare to control FacetSearchParams sParams = getFacetSearchParams(facetRequests, fip); + FacetsCollector fc = FacetsCollector.create(sParams, indexReader, taxoReader); - FacetsCollector fc = new StandardFacetsCollector(sParams, indexReader, taxoReader) { - @Override - protected FacetsAccumulator initFacetsAccumulator(FacetSearchParams facetSearchParams, IndexReader indexReader, TaxonomyReader taxonomyReader) { - FacetsAccumulator fa = new StandardFacetsAccumulator(facetSearchParams, indexReader, taxonomyReader); - fa.setComplementThreshold(FacetsAccumulator.DISABLE_COMPLEMENT); - return fa; - } - }; - searcher.search(new MatchAllDocsQuery(), fc); List facetResults = fc.getFacetResults(); @@ -169,15 +159,7 @@ // do different facet counts and compare to control CategoryPath path = new CategoryPath("a", "b"); FacetSearchParams sParams = getFacetSearchParams(fip, new CountFacetRequest(path, Integer.MAX_VALUE)); - - FacetsCollector fc = new StandardFacetsCollector(sParams, indexReader, taxoReader) { - @Override - protected FacetsAccumulator initFacetsAccumulator(FacetSearchParams facetSearchParams, IndexReader indexReader, TaxonomyReader taxonomyReader) { - FacetsAccumulator fa = new StandardFacetsAccumulator(facetSearchParams, indexReader, taxonomyReader); - fa.setComplementThreshold(FacetsAccumulator.DISABLE_COMPLEMENT); - return fa; - } - }; + FacetsCollector fc = FacetsCollector.create(sParams, indexReader, taxoReader); searcher.search(new MatchAllDocsQuery(), fc); List results = fc.getFacetResults(); @@ -187,17 +169,8 @@ assertEquals(path + " should only have 4 desendants", 4, res.getNumValidDescendants()); // As a control base results, ask for top-1000 results - FacetSearchParams sParams2 = getFacetSearchParams( - fip, new CountFacetRequest(path, Integer.MAX_VALUE)); - - FacetsCollector fc2 = new StandardFacetsCollector(sParams2, indexReader, taxoReader) { - @Override - protected FacetsAccumulator initFacetsAccumulator(FacetSearchParams facetSearchParams, IndexReader indexReader, TaxonomyReader taxonomyReader) { - FacetsAccumulator fa = new StandardFacetsAccumulator(facetSearchParams, indexReader, taxonomyReader); - fa.setComplementThreshold(FacetsAccumulator.DISABLE_COMPLEMENT); - return fa; - } - }; + FacetSearchParams sParams2 = getFacetSearchParams(fip, new CountFacetRequest(path, Integer.MAX_VALUE)); + FacetsCollector fc2 = FacetsCollector.create(sParams2, indexReader, taxoReader); searcher.search(new MatchAllDocsQuery(), fc2); List baseResults = fc2.getFacetResults(); Index: lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKResultsHandlerRandom.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKResultsHandlerRandom.java (revision 1443446) +++ lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKResultsHandlerRandom.java (working copy) @@ -8,8 +8,6 @@ import org.apache.lucene.facet.search.params.FacetSearchParams; import org.apache.lucene.facet.search.results.FacetResult; import org.apache.lucene.facet.search.results.FacetResultNode; -import org.apache.lucene.facet.taxonomy.TaxonomyReader; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; import org.junit.Test; @@ -37,17 +35,9 @@ throws IOException { Query q = new MatchAllDocsQuery(); FacetSearchParams facetSearchParams = searchParamsWithRequests(numResults, fip); - FacetsCollector fc = new StandardFacetsCollector(facetSearchParams, indexReader, taxoReader) { - @Override - protected FacetsAccumulator initFacetsAccumulator( - FacetSearchParams facetSearchParams, IndexReader indexReader, - TaxonomyReader taxonomyReader) { - FacetsAccumulator accumulator = new StandardFacetsAccumulator(facetSearchParams, indexReader, taxonomyReader); - double complement = doComplement ? FacetsAccumulator.FORCE_COMPLEMENT : FacetsAccumulator.DISABLE_COMPLEMENT; - accumulator.setComplementThreshold(complement); - return accumulator; - } - }; + StandardFacetsAccumulator sfa = new StandardFacetsAccumulator(facetSearchParams, indexReader, taxoReader); + sfa.setComplementThreshold(doComplement ? StandardFacetsAccumulator.FORCE_COMPLEMENT : StandardFacetsAccumulator.DISABLE_COMPLEMENT); + FacetsCollector fc = FacetsCollector.create(sfa); searcher.search(q, fc); List facetResults = fc.getFacetResults(); return facetResults; Index: lucene/facet/src/test/org/apache/lucene/facet/search/associations/AssociationsFacetRequestTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/associations/AssociationsFacetRequestTest.java (revision 1443446) +++ lucene/facet/src/test/org/apache/lucene/facet/search/associations/AssociationsFacetRequestTest.java (working copy) @@ -25,7 +25,6 @@ import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; -import org.apache.lucene.util.LuceneTestCase; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; @@ -119,8 +118,8 @@ assertNotNull("No results!",res); assertEquals("Wrong number of results!",2, res.size()); - assertEquals("Wrong count for category 'a'!",200, (int) res.get(0).getFacetResultNode().value); - assertEquals("Wrong count for category 'b'!",150, (int) res.get(1).getFacetResultNode().value); + assertEquals("Wrong count for category 'a'!", 200, (int) res.get(0).getFacetResultNode().value); + assertEquals("Wrong count for category 'b'!", 150, (int) res.get(1).getFacetResultNode().value); taxo.close(); } Index: lucene/facet/src/test/org/apache/lucene/facet/search/params/FacetRequestTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/params/FacetRequestTest.java (revision 1443446) +++ lucene/facet/src/test/org/apache/lucene/facet/search/params/FacetRequestTest.java (working copy) @@ -1,13 +1,7 @@ package org.apache.lucene.facet.search.params; import org.apache.lucene.facet.FacetTestCase; -import org.apache.lucene.facet.search.FacetResultsHandler; import org.apache.lucene.facet.taxonomy.CategoryPath; -import org.apache.lucene.facet.taxonomy.TaxonomyReader; -import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.store.Directory; import org.junit.Test; /* @@ -49,47 +43,4 @@ assertFalse("equals() should return false as fr1.depth != fr2.depth", fr1.equals(fr2)); } - @Test - public void testGetFacetResultHandlerDifferentTaxonomy() throws Exception { - FacetRequest fr = new CountFacetRequest(new CategoryPath("a"), 10); - Directory dir1 = newDirectory(); - Directory dir2 = newDirectory(); - // create empty indexes, so that LTR ctor won't complain about a missing index. - new IndexWriter(dir1, new IndexWriterConfig(TEST_VERSION_CURRENT, null)).close(); - new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, null)).close(); - TaxonomyReader tr1 = new DirectoryTaxonomyReader(dir1); - TaxonomyReader tr2 = new DirectoryTaxonomyReader(dir2); - FacetResultsHandler frh1 = fr.createFacetResultsHandler(tr1); - FacetResultsHandler frh2 = fr.createFacetResultsHandler(tr2); - assertTrue("should not return the same FacetResultHandler instance for different TaxonomyReader instances", frh1 != frh2); - tr1.close(); - tr2.close(); - dir1.close(); - dir2.close(); - } - - @Test - public void testImmutability() throws Exception { - // Tests that after a FRH is created by FR, changes to FR are not reflected - // in the FRH. - FacetRequest fr = new CountFacetRequest(new CategoryPath("a"), 10); - Directory dir = newDirectory(); - // create empty indexes, so that LTR ctor won't complain about a missing index. - new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null)).close(); - TaxonomyReader tr = new DirectoryTaxonomyReader(dir); - FacetResultsHandler frh = fr.createFacetResultsHandler(tr); - fr.setDepth(10); - assertEquals(FacetRequest.DEFAULT_DEPTH, frh.getFacetRequest().getDepth()); - tr.close(); - dir.close(); - } - - @Test - public void testClone() throws Exception { - FacetRequest fr = new CountFacetRequest(new CategoryPath("a"), 10); - FacetRequest clone = fr.clone(); - fr.setDepth(10); - assertEquals("depth should not have been affected in the clone", FacetRequest.DEFAULT_DEPTH, clone.getDepth()); - } - } Index: lucene/facet/src/test/org/apache/lucene/facet/search/sampling/BaseSampleTestTopK.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/sampling/BaseSampleTestTopK.java (revision 1443446) +++ lucene/facet/src/test/org/apache/lucene/facet/search/sampling/BaseSampleTestTopK.java (working copy) @@ -5,11 +5,8 @@ import org.apache.lucene.facet.index.params.FacetIndexingParams; import org.apache.lucene.facet.search.BaseTestTopK; -import org.apache.lucene.facet.search.FacetsAccumulator; import org.apache.lucene.facet.search.FacetsCollector; -import org.apache.lucene.facet.search.ScoredDocIDs; -import org.apache.lucene.facet.search.ScoredDocIdCollector; -import org.apache.lucene.facet.search.StandardFacetsCollector; +import org.apache.lucene.facet.search.StandardFacetsAccumulator; import org.apache.lucene.facet.search.params.FacetRequest; import org.apache.lucene.facet.search.params.FacetRequest.ResultMode; import org.apache.lucene.facet.search.params.FacetSearchParams; @@ -17,7 +14,6 @@ import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.search.MultiCollector; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; @@ -60,7 +56,7 @@ return res; } - protected abstract FacetsAccumulator getSamplingAccumulator(Sampler sampler, TaxonomyReader taxoReader, + protected abstract StandardFacetsAccumulator getSamplingAccumulator(Sampler sampler, TaxonomyReader taxoReader, IndexReader indexReader, FacetSearchParams searchParams); /** @@ -79,12 +75,11 @@ // Get all of the documents and run the query, then do different // facet counts and compare to control Query q = new TermQuery(new Term(CONTENT_FIELD, BETA)); // 90% of the docs - ScoredDocIdCollector docCollector = ScoredDocIdCollector.create(indexReader.maxDoc(), false); FacetSearchParams expectedSearchParams = searchParamsWithRequests(K, fip); FacetsCollector fc = FacetsCollector.create(expectedSearchParams, indexReader, taxoReader); - searcher.search(q, MultiCollector.wrap(docCollector, fc)); + searcher.search(q, fc); List expectedResults = fc.getFacetResults(); @@ -95,7 +90,7 @@ for (int nTrial = 0; nTrial < RETRIES; nTrial++) { try { // complement with sampling! - final Sampler sampler = createSampler(nTrial, docCollector.getScoredDocIDs(), useRandomSampler); + final Sampler sampler = createSampler(nTrial, useRandomSampler); assertSampling(expectedResults, q, sampler, samplingSearchParams, false); assertSampling(expectedResults, q, sampler, samplingSearchParams, true); @@ -124,19 +119,12 @@ private FacetsCollector samplingCollector(final boolean complement, final Sampler sampler, FacetSearchParams samplingSearchParams) { - FacetsCollector samplingFC = new StandardFacetsCollector(samplingSearchParams, indexReader, taxoReader) { - @Override - protected FacetsAccumulator initFacetsAccumulator(FacetSearchParams facetSearchParams, IndexReader indexReader, - TaxonomyReader taxonomyReader) { - FacetsAccumulator acc = getSamplingAccumulator(sampler, taxonomyReader, indexReader, facetSearchParams); - acc.setComplementThreshold(complement ? FacetsAccumulator.FORCE_COMPLEMENT : FacetsAccumulator.DISABLE_COMPLEMENT); - return acc; - } - }; - return samplingFC; + StandardFacetsAccumulator sfa = getSamplingAccumulator(sampler, taxoReader, indexReader, samplingSearchParams); + sfa.setComplementThreshold(complement ? StandardFacetsAccumulator.FORCE_COMPLEMENT : StandardFacetsAccumulator.DISABLE_COMPLEMENT); + return FacetsCollector.create(sfa); } - private Sampler createSampler(int nTrial, ScoredDocIDs scoredDocIDs, boolean useRandomSampler) { + private Sampler createSampler(int nTrial, boolean useRandomSampler) { SamplingParams samplingParams = new SamplingParams(); final double retryFactor = Math.pow(1.01, nTrial); @@ -149,7 +137,6 @@ Sampler sampler = useRandomSampler ? new RandomSampler(samplingParams, new Random(random().nextLong())) : new RepeatableSampler(samplingParams); - assertTrue("must enable sampling for this test!",sampler.shouldSample(scoredDocIDs)); return sampler; } Index: lucene/facet/src/test/org/apache/lucene/facet/search/sampling/OversampleWithDepthTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/sampling/OversampleWithDepthTest.java (revision 1443446) +++ lucene/facet/src/test/org/apache/lucene/facet/search/sampling/OversampleWithDepthTest.java (working copy) @@ -7,9 +7,8 @@ import org.apache.lucene.facet.FacetTestCase; import org.apache.lucene.facet.index.FacetFields; import org.apache.lucene.facet.index.params.FacetIndexingParams; -import org.apache.lucene.facet.search.FacetsAccumulator; import org.apache.lucene.facet.search.FacetsCollector; -import org.apache.lucene.facet.search.StandardFacetsCollector; +import org.apache.lucene.facet.search.StandardFacetsAccumulator; import org.apache.lucene.facet.search.params.CountFacetRequest; import org.apache.lucene.facet.search.params.FacetRequest; import org.apache.lucene.facet.search.params.FacetRequest.ResultMode; @@ -112,14 +111,9 @@ private FacetResult searchWithFacets(IndexReader r, TaxonomyReader tr, FacetSearchParams fsp, final SamplingParams params) throws IOException { // a FacetsCollector with a sampling accumulator - FacetsCollector fcWithSampling = new StandardFacetsCollector(fsp, r, tr) { - @Override - protected FacetsAccumulator initFacetsAccumulator(FacetSearchParams facetSearchParams, IndexReader indexReader, - TaxonomyReader taxonomyReader) { - Sampler sampler = new RandomSampler(params, random()); - return new SamplingAccumulator(sampler, facetSearchParams, indexReader, taxonomyReader); - } - }; + Sampler sampler = new RandomSampler(params, random()); + StandardFacetsAccumulator sfa = new SamplingAccumulator(sampler, fsp, r, tr); + FacetsCollector fcWithSampling = FacetsCollector.create(sfa); IndexSearcher s = new IndexSearcher(r); s.search(new MatchAllDocsQuery(), fcWithSampling); Index: lucene/facet/src/test/org/apache/lucene/facet/search/sampling/SamplingAccumulatorTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/sampling/SamplingAccumulatorTest.java (revision 1443446) +++ lucene/facet/src/test/org/apache/lucene/facet/search/sampling/SamplingAccumulatorTest.java (working copy) @@ -1,12 +1,11 @@ package org.apache.lucene.facet.search.sampling; +import org.apache.lucene.facet.search.StandardFacetsAccumulator; +import org.apache.lucene.facet.search.params.FacetSearchParams; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.util.LuceneTestCase.Slow; -import org.apache.lucene.facet.search.FacetsAccumulator; -import org.apache.lucene.facet.search.params.FacetSearchParams; -import org.apache.lucene.facet.taxonomy.TaxonomyReader; - /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -28,7 +27,7 @@ public class SamplingAccumulatorTest extends BaseSampleTestTopK { @Override - protected FacetsAccumulator getSamplingAccumulator(Sampler sampler, TaxonomyReader taxoReader, + protected StandardFacetsAccumulator getSamplingAccumulator(Sampler sampler, TaxonomyReader taxoReader, IndexReader indexReader, FacetSearchParams searchParams) { return new SamplingAccumulator(sampler, searchParams, indexReader, taxoReader); } Index: lucene/facet/src/test/org/apache/lucene/facet/util/TestScoredDocIDsUtils.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/util/TestScoredDocIDsUtils.java (revision 1443446) +++ lucene/facet/src/test/org/apache/lucene/facet/util/TestScoredDocIDsUtils.java (working copy) @@ -12,19 +12,13 @@ import org.apache.lucene.facet.FacetTestCase; import org.apache.lucene.facet.search.ScoredDocIDs; import org.apache.lucene.facet.search.ScoredDocIDsIterator; -import org.apache.lucene.facet.search.ScoredDocIdCollector; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; -import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; import org.junit.Test; @@ -101,79 +95,6 @@ } } - @Test - public void testWithDeletions() throws Exception { - int N_DOCS = 100; - - DocumentFactory docFactory = new DocumentFactory(N_DOCS) { - @Override - public boolean markedDeleted(int docNum) { - return (docNum % 3 == 0 || // every 3rd documents, including first - docNum == numDocs - 1 || // last document - docNum == numDocs / 2 || // 3 consecutive documents in the middle - docNum == 1 + numDocs / 2 || - docNum == 2 + numDocs / 2); - } - - // every 6th document (starting from the 2nd) would contain 'alpha' - @Override - public boolean haveAlpha(int docNum) { - return (docNum % 6 == 1); - } - }; - - Directory dir = newDirectory(); - IndexReader reader = createReaderWithNDocs(random(), N_DOCS, docFactory, dir); - try { - ScoredDocIDs allDocs = ScoredDocIdsUtils.createAllDocsScoredDocIDs(reader); - ScoredDocIDsIterator it = allDocs.iterator(); - int numIteratedDocs = 0; - while (it.next()) { - numIteratedDocs++; - int docNum = it.getDocID(); - assertNull( - "Deleted docs must not appear in the allDocsScoredDocIds set: " + docNum, - reader.document(docNum).getField("del")); - } - - assertEquals("Wrong number of (live) documents", allDocs.size(), numIteratedDocs); - - // Get all 'alpha' documents - ScoredDocIdCollector collector = ScoredDocIdCollector.create(reader.maxDoc(), false); - Query q = new TermQuery(new Term(DocumentFactory.field, DocumentFactory.alphaTxt)); - IndexSearcher searcher = newSearcher(reader); - searcher.search(q, collector); - - ScoredDocIDs scoredDocIds = collector.getScoredDocIDs(); - FixedBitSet resultSet = (FixedBitSet) scoredDocIds.getDocIDs(); - - // Getting the complement set of the query result - ScoredDocIDs complementSet = ScoredDocIdsUtils.getComplementSet(scoredDocIds, reader); - - assertEquals("Number of documents in complement set mismatch", - reader.numDocs() - scoredDocIds.size(), complementSet.size()); - - // now make sure the documents in the complement set are not deleted - // and not in the original result set - ScoredDocIDsIterator compIterator = complementSet.iterator(); - Bits live = MultiFields.getLiveDocs(reader); - while (compIterator.next()) { - int docNum = compIterator.getDocID(); - assertFalse( - "Complement-Set must not contain deleted documents (doc="+docNum+")", - live != null && !live.get(docNum)); - assertNull("Complement-Set must not contain docs from the original set (doc="+ docNum+")", - reader.document(docNum).getField("del")); - assertFalse( - "Complement-Set must not contain docs from the original set (doc="+docNum+")", - resultSet.get(docNum)); - } - } finally { - reader.close(); - dir.close(); - } - } - /** * Creates an index with n documents, this method is meant for testing purposes ONLY */ @@ -189,10 +110,7 @@ private final static Field deletionMark = new StringField(field, delTxt, Field.Store.NO); private final static Field alphaContent = new StringField(field, alphaTxt, Field.Store.NO); - protected final int numDocs; - public DocumentFactory(int totalNumDocs) { - this.numDocs = totalNumDocs; } public boolean markedDeleted(int docNum) { Index: lucene/facet/src/test/org/apache/lucene/util/collections/ObjectToIntMapTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/util/collections/ObjectToIntMapTest.java (revision 1443446) +++ lucene/facet/src/test/org/apache/lucene/util/collections/ObjectToIntMapTest.java (working copy) @@ -4,13 +4,9 @@ import java.util.Iterator; import java.util.Random; +import org.apache.lucene.facet.FacetTestCase; import org.junit.Test; -import org.apache.lucene.facet.FacetTestCase; -import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.collections.IntIterator; -import org.apache.lucene.util.collections.ObjectToIntMap; - /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with