diff --git a/lucene/facet/src/java/org/apache/lucene/facet/FacetsCollector.java b/lucene/facet/src/java/org/apache/lucene/facet/FacetsCollector.java index 4f52d57..cc40f2c 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/FacetsCollector.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/FacetsCollector.java @@ -23,6 +23,7 @@ import java.util.List; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.Collector; +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.Filter; import org.apache.lucene.search.FilteredQuery; @@ -38,7 +39,8 @@ import org.apache.lucene.search.TopFieldCollector; import org.apache.lucene.search.TopFieldDocs; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.OpenBitSet; +import org.apache.lucene.util.OpenBitSetIterator; /** Collects hits for subsequent faceting. Once you've run * a search and collect hits into this, instantiate one of @@ -50,11 +52,11 @@ public final class FacetsCollector extends Collector { private AtomicReaderContext context; private Scorer scorer; - private FixedBitSet bits; private int totalHits; private float[] scores; private final boolean keepScores; private final List matchingDocs = new ArrayList(); + private MutableDocIdSet bits; /** * Holds the documents that were matched in the {@link AtomicReaderContext}. @@ -66,7 +68,7 @@ public final class FacetsCollector extends Collector { public final AtomicReaderContext context; /** Which documents were seen. */ - public final FixedBitSet bits; + public final MutableDocIdSet bits; /** Non-sparse scores array. */ public final float[] scores; @@ -75,7 +77,7 @@ public final class FacetsCollector extends Collector { public final int totalHits; /** Sole constructor. */ - public MatchingDocs(AtomicReaderContext context, FixedBitSet bits, int totalHits, float[] scores) { + public MatchingDocs(AtomicReaderContext context, MutableDocIdSet bits, int totalHits, float[] scores) { this.context = context; this.bits = bits; this.scores = scores; @@ -93,6 +95,24 @@ public final class FacetsCollector extends Collector { public FacetsCollector(boolean keepScores) { this.keepScores = keepScores; } + + /** + * Creates a {@link MutableDocIdSet} to record hits + */ + public MutableDocIdSet createHitsSet(final int maxDoc) { + return new MutableDocIdSet() { + private final OpenBitSet bits = new OpenBitSet(maxDoc); + @Override + public DocIdSetIterator iterator() throws IOException { + return new OpenBitSetIterator(bits); + } + + @Override + public void addDoc(int docId) throws IOException { + bits.set(docId); + } + }; + } /** True if scores were saved. */ public boolean getKeepScores() { @@ -124,7 +144,7 @@ public final class FacetsCollector extends Collector { @Override public final void collect(int doc) throws IOException { - bits.set(doc); + bits.addDoc(doc); if (keepScores) { if (totalHits >= scores.length) { float[] newScores = new float[ArrayUtil.oversize(totalHits + 1, 4)]; @@ -146,7 +166,7 @@ public final class FacetsCollector extends Collector { if (bits != null) { matchingDocs.add(new MatchingDocs(this.context, bits, totalHits, scores)); } - bits = new FixedBitSet(context.reader().maxDoc()); + bits = createHitsSet(context.reader().maxDoc()); totalHits = 0; if (keepScores) { scores = new float[64]; // some initial size diff --git a/lucene/facet/src/java/org/apache/lucene/facet/MutableDocIdSet.java b/lucene/facet/src/java/org/apache/lucene/facet/MutableDocIdSet.java new file mode 100644 index 0000000..2539496 --- /dev/null +++ b/lucene/facet/src/java/org/apache/lucene/facet/MutableDocIdSet.java @@ -0,0 +1,13 @@ +package org.apache.lucene.facet; + +import java.io.IOException; + +import org.apache.lucene.search.DocIdSet; + +/** + * Mutable {@link org.apache.lucene.search.DocIdSet} + * + */ +public abstract class MutableDocIdSet extends DocIdSet { + public abstract void addDoc(int docId) throws IOException; +} diff --git a/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRangeFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRangeFacetCounts.java index dac45d0..21108e9 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRangeFacetCounts.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRangeFacetCounts.java @@ -30,6 +30,7 @@ import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.valuesource.DoubleFieldSource; import org.apache.lucene.queries.function.valuesource.FloatFieldSource; // javadocs +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.util.NumericUtils; /** {@link Facets} implementation that computes counts for @@ -80,18 +81,19 @@ public class DoubleRangeFacetCounts extends RangeFacetCounts { int missingCount = 0; for (MatchingDocs hits : matchingDocs) { - FunctionValues fv = valueSource.getValues(Collections.emptyMap(), hits.context); - final int length = hits.bits.length(); - int doc = 0; + FunctionValues fv = valueSource.getValues(Collections.emptyMap(), hits.context); + totCount += hits.totalHits; - while (doc < length && (doc = hits.bits.nextSetBit(doc)) != -1) { + DocIdSetIterator docs = hits.bits.iterator(); + + int doc; + while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { // Skip missing docs: if (fv.exists(doc)) { counter.add(NumericUtils.doubleToSortableLong(fv.doubleVal(doc))); } else { missingCount++; } - doc++; } } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/range/LongRangeFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/range/LongRangeFacetCounts.java index a0c807d..2244137 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/range/LongRangeFacetCounts.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/range/LongRangeFacetCounts.java @@ -27,6 +27,7 @@ import org.apache.lucene.facet.FacetsCollector.MatchingDocs; import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.valuesource.LongFieldSource; +import org.apache.lucene.search.DocIdSetIterator; /** {@link Facets} implementation that computes counts for * dynamic long ranges from a provided {@link ValueSource}, @@ -62,18 +63,17 @@ public class LongRangeFacetCounts extends RangeFacetCounts { int missingCount = 0; for (MatchingDocs hits : matchingDocs) { FunctionValues fv = valueSource.getValues(Collections.emptyMap(), hits.context); - final int length = hits.bits.length(); - int doc = 0; + totCount += hits.totalHits; - while (doc < length && (doc = hits.bits.nextSetBit(doc)) != -1) { + DocIdSetIterator docs = hits.bits.iterator(); + int doc; + while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { // Skip missing docs: if (fv.exists(doc)) { counter.add(fv.longVal(doc)); } else { missingCount++; } - - doc++; } } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java index a8fcfc6..442d668 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java @@ -38,6 +38,7 @@ import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues; import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.util.BytesRef; /** Compute facets counts from previously @@ -176,8 +177,8 @@ public class SortedSetDocValuesFacetCounts extends Facets { } final int maxDoc = reader.maxDoc(); - assert maxDoc == hits.bits.length(); - //System.out.println(" dv=" + dv); + + DocIdSetIterator docs = hits.bits.iterator(); // TODO: yet another option is to count all segs // first, only in seg-ord space, and then do a @@ -195,9 +196,9 @@ public class SortedSetDocValuesFacetCounts extends Facets { if (hits.totalHits < numSegOrds/10) { //System.out.println(" remap as-we-go"); - // Remap every ord to global ord as we iterate: - int doc = 0; - while (doc < maxDoc && (doc = hits.bits.nextSetBit(doc)) != -1) { + // Remap every ord to global ord as we iterate: + int doc; + while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { //System.out.println(" doc=" + doc); segValues.setDocument(doc); int term = (int) segValues.nextOrd(); @@ -206,15 +207,14 @@ public class SortedSetDocValuesFacetCounts extends Facets { counts[(int) ordinalMap.getGlobalOrd(segOrd, term)]++; term = (int) segValues.nextOrd(); } - ++doc; } } else { //System.out.println(" count in seg ord first"); // First count in seg-ord space: final int[] segCounts = new int[numSegOrds]; - int doc = 0; - while (doc < maxDoc && (doc = hits.bits.nextSetBit(doc)) != -1) { + int doc; + while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { //System.out.println(" doc=" + doc); segValues.setDocument(doc); int term = (int) segValues.nextOrd(); @@ -223,7 +223,6 @@ public class SortedSetDocValuesFacetCounts extends Facets { segCounts[term]++; term = (int) segValues.nextOrd(); } - ++doc; } // Then, migrate to global ords: @@ -237,10 +236,9 @@ public class SortedSetDocValuesFacetCounts extends Facets { } } else { // No ord mapping (e.g., single segment index): - // just aggregate directly into counts: - - int doc = 0; - while (doc < maxDoc && (doc = hits.bits.nextSetBit(doc)) != -1) { + // just aggregate directly into counts: + int doc; + while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { segValues.setDocument(doc); int term = (int) segValues.nextOrd(); while (term != SortedSetDocValues.NO_MORE_ORDS) { diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java index bfe276d..7b3df31 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java @@ -21,11 +21,11 @@ import java.io.IOException; import java.util.List; import org.apache.lucene.facet.FacetsCollector; -import org.apache.lucene.facet.FacetsConfig; import org.apache.lucene.facet.FacetsCollector.MatchingDocs; +import org.apache.lucene.facet.FacetsConfig; import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.FixedBitSet; /** Computes facets counts, assuming the default encoding * into DocValues was used. @@ -55,12 +55,12 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets { if (dv == null) { // this reader does not have DocValues for the requested category list continue; } - FixedBitSet bits = hits.bits; - - final int length = hits.bits.length(); - int doc = 0; + BytesRef scratch = new BytesRef(); - while (doc < length && (doc = bits.nextSetBit(doc)) != -1) { + DocIdSetIterator docs = hits.bits.iterator(); + + int doc; + while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { dv.get(doc, scratch); byte[] bytes = scratch.bytes; int end = scratch.offset + scratch.length; diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetCounts.java index 7eae584..06f2d2b 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetCounts.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyFacetCounts.java @@ -24,7 +24,7 @@ import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.facet.FacetsCollector.MatchingDocs; import org.apache.lucene.facet.FacetsConfig; import org.apache.lucene.index.BinaryDocValues; -import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.util.IntsRef; /** Reads from any {@link OrdinalsReader}; use {@link @@ -49,11 +49,10 @@ public class TaxonomyFacetCounts extends IntTaxonomyFacets { IntsRef scratch = new IntsRef(); for(MatchingDocs hits : matchingDocs) { OrdinalsReader.OrdinalsSegmentReader ords = ordinalsReader.getReader(hits.context); - FixedBitSet bits = hits.bits; - - final int length = hits.bits.length(); - int doc = 0; - while (doc < length && (doc = bits.nextSetBit(doc)) != -1) { + DocIdSetIterator docs = hits.bits.iterator(); + + int doc; + while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { ords.get(doc, scratch); for(int i=0;i