Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1445194) +++ lucene/CHANGES.txt (working copy) @@ -159,6 +159,11 @@ to Lucene42DocValuesConsumer) if you want to make this tradeoff. (Adrien Grand, Robert Muir) +* LUCENE-4769: Added OrdinalsCache and CachedOrdsCountingFacetsAggregator + which uses the cache to obtain a document's ordinals. This aggregator + is faster than others, however consumes much more RAM. + (Michael McCandless, Shai Erera) + API Changes * LUCENE-4709: FacetResultNode no longer has a residue field. (Shai Erera) Index: lucene/facet/src/java/org/apache/lucene/facet/encoding/IntDecoder.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/encoding/IntDecoder.java (revision 1445194) +++ lucene/facet/src/java/org/apache/lucene/facet/encoding/IntDecoder.java (working copy) @@ -29,7 +29,8 @@ /** * Decodes the values from the buffer into the given {@link IntsRef}. Note - * that {@code values.offset} and {@code values.length} are set to 0. + * that {@code values.offset} is set to 0, and {@code values.length} is + * updated to denote the number of decoded values. */ public abstract void decode(BytesRef buf, IntsRef values); Index: lucene/facet/src/java/org/apache/lucene/facet/search/CachedOrdsCountingFacetsAggregator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/CachedOrdsCountingFacetsAggregator.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/search/CachedOrdsCountingFacetsAggregator.java (working copy) @@ -0,0 +1,54 @@ +package org.apache.lucene.facet.search; + +import java.io.IOException; + +import org.apache.lucene.facet.params.CategoryListParams; +import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; +import org.apache.lucene.facet.search.OrdinalsCache.CachedOrds; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A {@link FacetsAggregator} which updates categories values by counting their + * occurrences in matching documents. Uses {@link OrdinalsCache} to obtain the + * category ordinals of each segment. + * + * @lucene.experimental + */ +public class CachedOrdsCountingFacetsAggregator extends IntRollupFacetsAggregator { + + @Override + public void aggregate(MatchingDocs matchingDocs, CategoryListParams clp, FacetArrays facetArrays) throws IOException { + final CachedOrds ords = OrdinalsCache.getCachedOrds(matchingDocs.context, clp); + if (ords == null) { + return; // this segment has no ordinals for the given category list + } + final int[] counts = facetArrays.getIntArray(); + int doc = 0; + int length = matchingDocs.bits.length(); + while (doc < length && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) { + int start = ords.offsets[doc]; + int end = ords.offsets[doc + 1]; + for (int i = start; i < end; i++) { + ++counts[ords.ordinals[i]]; + } + ++doc; + } + } + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/search/CachedOrdsCountingFacetsAggregator.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsAggregator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsAggregator.java (revision 1445194) +++ lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsAggregator.java (working copy) @@ -4,7 +4,6 @@ import org.apache.lucene.facet.params.CategoryListParams; import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; -import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.util.IntsRef; /* @@ -33,7 +32,7 @@ * * @lucene.experimental */ -public class CountingFacetsAggregator implements FacetsAggregator { +public class CountingFacetsAggregator extends IntRollupFacetsAggregator { private final IntsRef ordinals = new IntsRef(32); @@ -57,27 +56,4 @@ } } - private int rollupCounts(int ordinal, int[] children, int[] siblings, int[] counts) { - int count = 0; - while (ordinal != TaxonomyReader.INVALID_ORDINAL) { - int childCount = counts[ordinal]; - childCount += rollupCounts(children[ordinal], children, siblings, counts); - counts[ordinal] = childCount; - count += childCount; - ordinal = siblings[ordinal]; - } - return count; - } - - @Override - public void rollupValues(FacetRequest fr, int ordinal, int[] children, int[] siblings, FacetArrays facetArrays) { - final int[] counts = facetArrays.getIntArray(); - counts[ordinal] += rollupCounts(children[ordinal], children, siblings, counts); - } - - @Override - public final boolean requiresDocScores() { - return false; - } - } Index: lucene/facet/src/java/org/apache/lucene/facet/search/FastCountingFacetsAggregator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/FastCountingFacetsAggregator.java (revision 1445194) +++ lucene/facet/src/java/org/apache/lucene/facet/search/FastCountingFacetsAggregator.java (working copy) @@ -7,7 +7,6 @@ import org.apache.lucene.facet.params.CategoryListParams; import org.apache.lucene.facet.params.FacetSearchParams; import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; -import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.util.BytesRef; @@ -37,7 +36,7 @@ * * @lucene.experimental */ -public final class FastCountingFacetsAggregator implements FacetsAggregator { +public final class FastCountingFacetsAggregator extends IntRollupFacetsAggregator { private final BytesRef buf = new BytesRef(32); @@ -95,27 +94,4 @@ } } - private int rollupCounts(int ordinal, int[] children, int[] siblings, int[] counts) { - int count = 0; - while (ordinal != TaxonomyReader.INVALID_ORDINAL) { - int childCount = counts[ordinal]; - childCount += rollupCounts(children[ordinal], children, siblings, counts); - counts[ordinal] = childCount; - count += childCount; - ordinal = siblings[ordinal]; - } - return count; - } - - @Override - public final void rollupValues(FacetRequest fr, int ordinal, int[] children, int[] siblings, FacetArrays facetArrays) { - final int[] counts = facetArrays.getIntArray(); - counts[ordinal] += rollupCounts(children[ordinal], children, siblings, counts); - } - - @Override - public final boolean requiresDocScores() { - return false; - } - } Index: lucene/facet/src/java/org/apache/lucene/facet/search/IntRollupFacetsAggregator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/IntRollupFacetsAggregator.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/search/IntRollupFacetsAggregator.java (working copy) @@ -0,0 +1,63 @@ +package org.apache.lucene.facet.search; + +import java.io.IOException; + +import org.apache.lucene.facet.params.CategoryListParams; +import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A {@link FacetsAggregator} which implements + * {@link #rollupValues(FacetRequest, int, int[], int[], FacetArrays)} by + * summing the values from {@link FacetArrays#getIntArray()}. Extending classes + * should only implement {@link #aggregate}. Also, {@link #requiresDocScores()} + * always returns false. + * + * @lucene.experimental + */ +public abstract class IntRollupFacetsAggregator implements FacetsAggregator { + + @Override + public abstract void aggregate(MatchingDocs matchingDocs, CategoryListParams clp, FacetArrays facetArrays) throws IOException; + + private int rollupValues(int ordinal, int[] children, int[] siblings, int[] values) { + int value = 0; + while (ordinal != TaxonomyReader.INVALID_ORDINAL) { + int childValue = values[ordinal]; + childValue += rollupValues(children[ordinal], children, siblings, values); + values[ordinal] = childValue; + value += childValue; + ordinal = siblings[ordinal]; + } + return value; + } + + @Override + public final void rollupValues(FacetRequest fr, int ordinal, int[] children, int[] siblings, FacetArrays facetArrays) { + final int[] values = facetArrays.getIntArray(); + values[ordinal] += rollupValues(children[ordinal], children, siblings, values); + } + + @Override + public final boolean requiresDocScores() { + return false; + } + +} Property changes on: lucene/facet/src/java/org/apache/lucene/facet/search/IntRollupFacetsAggregator.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/facet/search/OrdinalsCache.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/OrdinalsCache.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/facet/search/OrdinalsCache.java (working copy) @@ -0,0 +1,118 @@ +package org.apache.lucene.facet.search; + +import java.io.IOException; +import java.util.Map; +import java.util.WeakHashMap; + +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.facet.encoding.IntDecoder; +import org.apache.lucene.facet.params.CategoryListParams; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IntsRef; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A per-segment cache of documents' category ordinals. Every {@link CachedOrds} + * holds the ordinals in a raw {@code int[]}, and therefore consumes as much RAM + * as the total number of ordinals found in the segment. + * + *
+ * NOTE: every {@link CachedOrds} is limited to 2.1B total ordinals. If + * that is a limitation for you then consider limiting the segment size to less + * documents, or use an alternative cache which pages through the category + * ordinals. + * + *
+ * NOTE: when using this cache, it is advised to use a
+ * {@link DocValuesFormat} that does not cache the data in memory, at least for
+ * the category lists fields, or otherwise you'll be doing double-caching.
+ */
+public class OrdinalsCache {
+
+ /** Holds the cached ordinals in two paralel {@code int[]} arrays. */
+ public static final class CachedOrds {
+
+ public final int[] offsets;
+ public final int[] ordinals;
+
+ /**
+ * Creates a new {@link CachedOrds} from the {@link BinaryDocValues}.
+ * Assumes that the {@link BinaryDocValues} is not {@code null}.
+ */
+ public CachedOrds(BinaryDocValues dv, int maxDoc, CategoryListParams clp) {
+ final BytesRef buf = new BytesRef();
+
+ offsets = new int[maxDoc + 1];
+ int[] ords = new int[maxDoc]; // let's assume one ordinal per-document as an initial size
+
+ // this aggregator is limited to Integer.MAX_VALUE total ordinals.
+ int totOrds = 0;
+ final IntDecoder decoder = clp.createEncoder().createMatchingDecoder();
+ final IntsRef values = new IntsRef(32);
+ for (int docID = 0; docID < maxDoc; docID++) {
+ offsets[docID] = totOrds;
+ dv.get(docID, buf);
+ if (buf.length > 0) {
+ // this document has facets
+ decoder.decode(buf, values);
+ if (totOrds + values.length >= ords.length) {
+ ords = ArrayUtil.grow(ords, totOrds + values.length + 1);
+ }
+ for (int i = 0; i < values.length; i++) {
+ ords[totOrds++] = values.ints[i];
+ }
+ }
+ }
+ offsets[maxDoc] = totOrds;
+
+ // if ords array is bigger by more than 10% of what we really need, shrink it
+ if ((double) totOrds / ords.length < 0.9) {
+ this.ordinals = new int[totOrds];
+ System.arraycopy(ords, 0, this.ordinals, 0, totOrds);
+ } else {
+ this.ordinals = ords;
+ }
+ }
+ }
+
+ private static final Map