Index: lucene/facet/src/java/org/apache/lucene/facet/search/CachedIntsCountingFacetsAggregator.java
===================================================================
--- lucene/facet/src/java/org/apache/lucene/facet/search/CachedIntsCountingFacetsAggregator.java (revision 0)
+++ lucene/facet/src/java/org/apache/lucene/facet/search/CachedIntsCountingFacetsAggregator.java (working copy)
@@ -0,0 +1,134 @@
+package org.apache.lucene.facet.search;
+
+import java.io.IOException;
+import java.util.Map;
+import java.util.WeakHashMap;
+
+import org.apache.lucene.codecs.diskdv.DiskDocValuesFormat;
+import org.apache.lucene.facet.params.CategoryListParams;
+import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs;
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A {@link FacetsAggregator} which updates categories values by counting their
+ * occurrences in matching documents. This aggregator caches the ordinals of
+ * each document in-memory, in two parallel {@code int[]} arrays, and therefore
+ * consumes more RAM than if they were kept "compressed" in-memory, however
+ * achieves significant performance improvements. If you use this aggregator,
+ * make sure you have enough RAM to cache the ordinals, and that you set your
+ * heap size accordingly.
+ *
+ *
+ * NOTE: this aggregator is limited to roughly 2.1B total categories
+ * found in the documents of a single segment. If that is a limitation for you
+ * then consider limiting the segment size to less documents, or write a
+ * different aggregator which pages through the categories in the segment.
+ *
+ *
+ * NOTE: if you are using this aggregator, it is advised to use
+ * {@link DiskDocValuesFormat} for the category lists fields, or otherwise
+ * you'll be doing double-caching.
+ */
+public class CachedIntsCountingFacetsAggregator extends IntRollupFacetsAggregator {
+
+ private static final class CachedInts {
+
+ final int[] offsets;
+ final int[] ordinals;
+
+ public CachedInts(BinaryDocValues dv, int maxDoc) {
+ final BytesRef buf = new BytesRef();
+
+ offsets = new int[maxDoc + 1];
+ int[] ords = new int[maxDoc]; // let's assume one ordinal per-document as an initial size
+
+ // this aggregator is limited to Integer.MAX_VALUE total ordinals.
+ int totOrds = 0;
+ for (int docID = 0; docID < maxDoc; docID++) {
+ offsets[docID] = totOrds;
+ dv.get(docID, buf);
+ if (buf.length > 0) {
+ // this document has facets
+ int upto = buf.offset + buf.length;
+ int ord = 0;
+ int offset = buf.offset;
+ int prev = 0;
+ while (offset < upto) {
+ byte b = buf.bytes[offset++];
+ if (b >= 0) {
+ prev = ord = ((ord << 7) | b) + prev;
+ if (totOrds == ords.length) {
+ ords = ArrayUtil.grow(ords, 1 + totOrds);
+ }
+ ords[totOrds] = ord;
+ totOrds++;
+ ord = 0;
+ } else {
+ ord = (ord << 7) | (b & 0x7F);
+ }
+ }
+ }
+ }
+ offsets[maxDoc] = totOrds;
+
+ // if ords array is bigger by more than 10% of what we really need, shrink it
+ if ((ords.length / (double) totOrds) - 1.0 > 0.1) {
+ this.ordinals = new int[totOrds];
+ System.arraycopy(ords, 0, this.ordinals, 0, totOrds);
+ } else {
+ this.ordinals = ords;
+ }
+ }
+ }
+
+ private static final Map intsCache = new WeakHashMap();
+
+ private static synchronized CachedInts getCachedInts(BinaryDocValues dv, int maxDoc) {
+ CachedInts ci = intsCache.get(dv);
+ if (ci == null) {
+ ci = new CachedInts(dv, maxDoc);
+ intsCache.put(dv, ci);
+ }
+ return ci;
+ }
+
+ @Override
+ public void aggregate(MatchingDocs matchingDocs, CategoryListParams clp, FacetArrays facetArrays) throws IOException {
+ BinaryDocValues dv = matchingDocs.context.reader().getBinaryDocValues(clp.field);
+ if (dv == null) {
+ return; // no categories for this reader
+ }
+ final int[] counts = facetArrays.getIntArray();
+ final CachedInts ci = getCachedInts(dv, matchingDocs.context.reader().maxDoc());
+ int doc = 0;
+ int length = matchingDocs.bits.length();
+ while (doc < length && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) {
+ int start = ci.offsets[doc];
+ int end = ci.offsets[doc + 1];
+ for (int i = start; i < end; i++) {
+ ++counts[ci.ordinals[i]];
+ }
+ ++doc;
+ }
+ }
+
+}
Property changes on: lucene/facet/src/java/org/apache/lucene/facet/search/CachedIntsCountingFacetsAggregator.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsAggregator.java
===================================================================
--- lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsAggregator.java (revision 1444748)
+++ lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsAggregator.java (working copy)
@@ -4,7 +4,6 @@
import org.apache.lucene.facet.params.CategoryListParams;
import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs;
-import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.util.IntsRef;
/*
@@ -33,7 +32,7 @@
*
* @lucene.experimental
*/
-public class CountingFacetsAggregator implements FacetsAggregator {
+public class CountingFacetsAggregator extends IntRollupFacetsAggregator {
private final IntsRef ordinals = new IntsRef(32);
@@ -57,27 +56,4 @@
}
}
- private int rollupCounts(int ordinal, int[] children, int[] siblings, int[] counts) {
- int count = 0;
- while (ordinal != TaxonomyReader.INVALID_ORDINAL) {
- int childCount = counts[ordinal];
- childCount += rollupCounts(children[ordinal], children, siblings, counts);
- counts[ordinal] = childCount;
- count += childCount;
- ordinal = siblings[ordinal];
- }
- return count;
- }
-
- @Override
- public void rollupValues(FacetRequest fr, int ordinal, int[] children, int[] siblings, FacetArrays facetArrays) {
- final int[] counts = facetArrays.getIntArray();
- counts[ordinal] += rollupCounts(children[ordinal], children, siblings, counts);
- }
-
- @Override
- public final boolean requiresDocScores() {
- return false;
- }
-
}
Index: lucene/facet/src/java/org/apache/lucene/facet/search/FastCountingFacetsAggregator.java
===================================================================
--- lucene/facet/src/java/org/apache/lucene/facet/search/FastCountingFacetsAggregator.java (revision 1444748)
+++ lucene/facet/src/java/org/apache/lucene/facet/search/FastCountingFacetsAggregator.java (working copy)
@@ -7,7 +7,6 @@
import org.apache.lucene.facet.params.CategoryListParams;
import org.apache.lucene.facet.params.FacetSearchParams;
import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs;
-import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.util.BytesRef;
@@ -37,7 +36,7 @@
*
* @lucene.experimental
*/
-public final class FastCountingFacetsAggregator implements FacetsAggregator {
+public final class FastCountingFacetsAggregator extends IntRollupFacetsAggregator {
private final BytesRef buf = new BytesRef(32);
@@ -95,27 +94,4 @@
}
}
- private int rollupCounts(int ordinal, int[] children, int[] siblings, int[] counts) {
- int count = 0;
- while (ordinal != TaxonomyReader.INVALID_ORDINAL) {
- int childCount = counts[ordinal];
- childCount += rollupCounts(children[ordinal], children, siblings, counts);
- counts[ordinal] = childCount;
- count += childCount;
- ordinal = siblings[ordinal];
- }
- return count;
- }
-
- @Override
- public final void rollupValues(FacetRequest fr, int ordinal, int[] children, int[] siblings, FacetArrays facetArrays) {
- final int[] counts = facetArrays.getIntArray();
- counts[ordinal] += rollupCounts(children[ordinal], children, siblings, counts);
- }
-
- @Override
- public final boolean requiresDocScores() {
- return false;
- }
-
}
Index: lucene/facet/src/java/org/apache/lucene/facet/search/IntRollupFacetsAggregator.java
===================================================================
--- lucene/facet/src/java/org/apache/lucene/facet/search/IntRollupFacetsAggregator.java (revision 0)
+++ lucene/facet/src/java/org/apache/lucene/facet/search/IntRollupFacetsAggregator.java (working copy)
@@ -0,0 +1,64 @@
+package org.apache.lucene.facet.search;
+
+import java.io.IOException;
+
+import org.apache.lucene.facet.params.CategoryListParams;
+import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A {@link FacetsAggregator} which implements
+ * {@link #rollupValues(FacetRequest, int, int[], int[], FacetArrays)} by
+ * summing the values from {@link FacetArrays#getIntArray()}.
+ * {@link #aggregate(MatchingDocs, CategoryListParams, FacetArrays)} is left
+ * abstract for extending classes to implement. Also,
+ * {@link #requiresDocScores()} always returns false.
+ *
+ * @lucene.experimental
+ */
+public abstract class IntRollupFacetsAggregator implements FacetsAggregator {
+
+ @Override
+ public abstract void aggregate(MatchingDocs matchingDocs, CategoryListParams clp, FacetArrays facetArrays) throws IOException;
+
+ private int rollupValues(int ordinal, int[] children, int[] siblings, int[] values) {
+ int value = 0;
+ while (ordinal != TaxonomyReader.INVALID_ORDINAL) {
+ int childValue = values[ordinal];
+ childValue += rollupValues(children[ordinal], children, siblings, values);
+ values[ordinal] = childValue;
+ value += childValue;
+ ordinal = siblings[ordinal];
+ }
+ return value;
+ }
+
+ @Override
+ public final void rollupValues(FacetRequest fr, int ordinal, int[] children, int[] siblings, FacetArrays facetArrays) {
+ final int[] values = facetArrays.getIntArray();
+ values[ordinal] += rollupValues(children[ordinal], children, siblings, values);
+ }
+
+ @Override
+ public final boolean requiresDocScores() {
+ return false;
+ }
+
+}
Property changes on: lucene/facet/src/java/org/apache/lucene/facet/search/IntRollupFacetsAggregator.java
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: lucene/facet/src/test/org/apache/lucene/facet/search/CountingFacetsAggregatorTest.java
===================================================================
--- lucene/facet/src/test/org/apache/lucene/facet/search/CountingFacetsAggregatorTest.java (revision 1444748)
+++ lucene/facet/src/test/org/apache/lucene/facet/search/CountingFacetsAggregatorTest.java (working copy)
@@ -270,7 +270,15 @@
}
private FacetsAccumulator randomAccumulator(FacetSearchParams fsp, IndexReader indexReader, TaxonomyReader taxoReader) {
- final FacetsAggregator aggregator = random().nextBoolean() ? new CountingFacetsAggregator() : new FastCountingFacetsAggregator();
+ final FacetsAggregator aggregator;
+ double val = random().nextDouble();
+ if (val < 0.6) {
+ aggregator = new FastCountingFacetsAggregator(); // it's the default, so give it the highest chance
+ } else if (val < 0.8) {
+ aggregator = new CountingFacetsAggregator();
+ } else {
+ aggregator = new CachedIntsCountingFacetsAggregator();
+ }
return new FacetsAccumulator(fsp, indexReader, taxoReader) {
@Override
public FacetsAggregator getAggregator() {