Index: lucene/facet/src/java/org/apache/lucene/facet/search/CachedIntsCountingFacetsAggregator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/CachedIntsCountingFacetsAggregator.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/search/CachedIntsCountingFacetsAggregator.java (working copy) @@ -0,0 +1,134 @@ +package org.apache.lucene.facet.search; + +import java.io.IOException; +import java.util.Map; +import java.util.WeakHashMap; + +import org.apache.lucene.codecs.diskdv.DiskDocValuesFormat; +import org.apache.lucene.facet.params.CategoryListParams; +import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A {@link FacetsAggregator} which updates categories values by counting their + * occurrences in matching documents. This aggregator caches the ordinals of + * each document in-memory, in two parallel {@code int[]} arrays, and therefore + * consumes more RAM than if they were kept "compressed" in-memory, however + * achieves significant performance improvements. If you use this aggregator, + * make sure you have enough RAM to cache the ordinals, and that you set your + * heap size accordingly. + * + *

+ * NOTE: this aggregator is limited to roughly 2.1B total categories + * found in the documents of a single segment. If that is a limitation for you + * then consider limiting the segment size to less documents, or write a + * different aggregator which pages through the categories in the segment. + * + *

+ * NOTE: if you are using this aggregator, it is advised to use + * {@link DiskDocValuesFormat} for the category lists fields, or otherwise + * you'll be doing double-caching. + */ +public class CachedIntsCountingFacetsAggregator extends IntRollupFacetsAggregator { + + private static final class CachedInts { + + final int[] offsets; + final int[] ordinals; + + public CachedInts(BinaryDocValues dv, int maxDoc) { + final BytesRef buf = new BytesRef(); + + offsets = new int[maxDoc + 1]; + int[] ords = new int[maxDoc]; // let's assume one ordinal per-document as an initial size + + // this aggregator is limited to Integer.MAX_VALUE total ordinals. + int totOrds = 0; + for (int docID = 0; docID < maxDoc; docID++) { + offsets[docID] = totOrds; + dv.get(docID, buf); + if (buf.length > 0) { + // this document has facets + int upto = buf.offset + buf.length; + int ord = 0; + int offset = buf.offset; + int prev = 0; + while (offset < upto) { + byte b = buf.bytes[offset++]; + if (b >= 0) { + prev = ord = ((ord << 7) | b) + prev; + if (totOrds == ords.length) { + ords = ArrayUtil.grow(ords, 1 + totOrds); + } + ords[totOrds] = ord; + totOrds++; + ord = 0; + } else { + ord = (ord << 7) | (b & 0x7F); + } + } + } + } + offsets[maxDoc] = totOrds; + + // if ords array is bigger by more than 10% of what we really need, shrink it + if ((ords.length / (double) totOrds) - 1.0 > 0.1) { + this.ordinals = new int[totOrds]; + System.arraycopy(ords, 0, this.ordinals, 0, totOrds); + } else { + this.ordinals = ords; + } + } + } + + private static final Map intsCache = new WeakHashMap(); + + private static synchronized CachedInts getCachedInts(BinaryDocValues dv, int maxDoc) { + CachedInts ci = intsCache.get(dv); + if (ci == null) { + ci = new CachedInts(dv, maxDoc); + intsCache.put(dv, ci); + } + return ci; + } + + @Override + public void aggregate(MatchingDocs matchingDocs, CategoryListParams clp, FacetArrays facetArrays) throws IOException { + BinaryDocValues dv = matchingDocs.context.reader().getBinaryDocValues(clp.field); + if (dv == null) { + return; // no categories for this reader + } + final int[] counts = facetArrays.getIntArray(); + final CachedInts ci = getCachedInts(dv, matchingDocs.context.reader().maxDoc()); + int doc = 0; + int length = matchingDocs.bits.length(); + while (doc < length && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) { + int start = ci.offsets[doc]; + int end = ci.offsets[doc + 1]; + for (int i = start; i < end; i++) { + ++counts[ci.ordinals[i]]; + } + ++doc; + } + } + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/search/CachedIntsCountingFacetsAggregator.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsAggregator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsAggregator.java (revision 1444748) +++ lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsAggregator.java (working copy) @@ -4,7 +4,6 @@ import org.apache.lucene.facet.params.CategoryListParams; import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; -import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.util.IntsRef; /* @@ -33,7 +32,7 @@ * * @lucene.experimental */ -public class CountingFacetsAggregator implements FacetsAggregator { +public class CountingFacetsAggregator extends IntRollupFacetsAggregator { private final IntsRef ordinals = new IntsRef(32); @@ -57,27 +56,4 @@ } } - private int rollupCounts(int ordinal, int[] children, int[] siblings, int[] counts) { - int count = 0; - while (ordinal != TaxonomyReader.INVALID_ORDINAL) { - int childCount = counts[ordinal]; - childCount += rollupCounts(children[ordinal], children, siblings, counts); - counts[ordinal] = childCount; - count += childCount; - ordinal = siblings[ordinal]; - } - return count; - } - - @Override - public void rollupValues(FacetRequest fr, int ordinal, int[] children, int[] siblings, FacetArrays facetArrays) { - final int[] counts = facetArrays.getIntArray(); - counts[ordinal] += rollupCounts(children[ordinal], children, siblings, counts); - } - - @Override - public final boolean requiresDocScores() { - return false; - } - } Index: lucene/facet/src/java/org/apache/lucene/facet/search/FastCountingFacetsAggregator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/FastCountingFacetsAggregator.java (revision 1444748) +++ lucene/facet/src/java/org/apache/lucene/facet/search/FastCountingFacetsAggregator.java (working copy) @@ -7,7 +7,6 @@ import org.apache.lucene.facet.params.CategoryListParams; import org.apache.lucene.facet.params.FacetSearchParams; import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; -import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.util.BytesRef; @@ -37,7 +36,7 @@ * * @lucene.experimental */ -public final class FastCountingFacetsAggregator implements FacetsAggregator { +public final class FastCountingFacetsAggregator extends IntRollupFacetsAggregator { private final BytesRef buf = new BytesRef(32); @@ -95,27 +94,4 @@ } } - private int rollupCounts(int ordinal, int[] children, int[] siblings, int[] counts) { - int count = 0; - while (ordinal != TaxonomyReader.INVALID_ORDINAL) { - int childCount = counts[ordinal]; - childCount += rollupCounts(children[ordinal], children, siblings, counts); - counts[ordinal] = childCount; - count += childCount; - ordinal = siblings[ordinal]; - } - return count; - } - - @Override - public final void rollupValues(FacetRequest fr, int ordinal, int[] children, int[] siblings, FacetArrays facetArrays) { - final int[] counts = facetArrays.getIntArray(); - counts[ordinal] += rollupCounts(children[ordinal], children, siblings, counts); - } - - @Override - public final boolean requiresDocScores() { - return false; - } - } Index: lucene/facet/src/java/org/apache/lucene/facet/search/IntRollupFacetsAggregator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/IntRollupFacetsAggregator.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/search/IntRollupFacetsAggregator.java (working copy) @@ -0,0 +1,64 @@ +package org.apache.lucene.facet.search; + +import java.io.IOException; + +import org.apache.lucene.facet.params.CategoryListParams; +import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A {@link FacetsAggregator} which implements + * {@link #rollupValues(FacetRequest, int, int[], int[], FacetArrays)} by + * summing the values from {@link FacetArrays#getIntArray()}. + * {@link #aggregate(MatchingDocs, CategoryListParams, FacetArrays)} is left + * abstract for extending classes to implement. Also, + * {@link #requiresDocScores()} always returns false. + * + * @lucene.experimental + */ +public abstract class IntRollupFacetsAggregator implements FacetsAggregator { + + @Override + public abstract void aggregate(MatchingDocs matchingDocs, CategoryListParams clp, FacetArrays facetArrays) throws IOException; + + private int rollupValues(int ordinal, int[] children, int[] siblings, int[] values) { + int value = 0; + while (ordinal != TaxonomyReader.INVALID_ORDINAL) { + int childValue = values[ordinal]; + childValue += rollupValues(children[ordinal], children, siblings, values); + values[ordinal] = childValue; + value += childValue; + ordinal = siblings[ordinal]; + } + return value; + } + + @Override + public final void rollupValues(FacetRequest fr, int ordinal, int[] children, int[] siblings, FacetArrays facetArrays) { + final int[] values = facetArrays.getIntArray(); + values[ordinal] += rollupValues(children[ordinal], children, siblings, values); + } + + @Override + public final boolean requiresDocScores() { + return false; + } + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/search/IntRollupFacetsAggregator.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/test/org/apache/lucene/facet/search/CountingFacetsAggregatorTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/search/CountingFacetsAggregatorTest.java (revision 1444748) +++ lucene/facet/src/test/org/apache/lucene/facet/search/CountingFacetsAggregatorTest.java (working copy) @@ -270,7 +270,15 @@ } private FacetsAccumulator randomAccumulator(FacetSearchParams fsp, IndexReader indexReader, TaxonomyReader taxoReader) { - final FacetsAggregator aggregator = random().nextBoolean() ? new CountingFacetsAggregator() : new FastCountingFacetsAggregator(); + final FacetsAggregator aggregator; + double val = random().nextDouble(); + if (val < 0.6) { + aggregator = new FastCountingFacetsAggregator(); // it's the default, so give it the highest chance + } else if (val < 0.8) { + aggregator = new CountingFacetsAggregator(); + } else { + aggregator = new CachedIntsCountingFacetsAggregator(); + } return new FacetsAccumulator(fsp, indexReader, taxoReader) { @Override public FacetsAggregator getAggregator() {