Index: lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsCollector.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsCollector.java (revision 1437012) +++ lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsCollector.java (working copy) @@ -6,28 +6,30 @@ import java.util.HashMap; import java.util.List; import java.util.Map.Entry; +import java.util.Map; import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy; import org.apache.lucene.facet.index.params.CategoryListParams; import org.apache.lucene.facet.index.params.FacetIndexingParams; import org.apache.lucene.facet.search.params.CountFacetRequest; -import org.apache.lucene.facet.search.params.FacetRequest; import org.apache.lucene.facet.search.params.FacetRequest.SortBy; import org.apache.lucene.facet.search.params.FacetRequest.SortOrder; +import org.apache.lucene.facet.search.params.FacetRequest; import org.apache.lucene.facet.search.params.FacetSearchParams; import org.apache.lucene.facet.search.results.FacetResult; import org.apache.lucene.facet.search.results.FacetResultNode; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.facet.taxonomy.directory.ParallelTaxonomyArrays; import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocValues.Source; import org.apache.lucene.index.DocValues; -import org.apache.lucene.index.DocValues.Source; import org.apache.lucene.search.Collector; import org.apache.lucene.search.Scorer; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.encoding.DGapVInt8IntDecoder; +import org.apache.lucene.util.packed.PackedInts; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -89,11 +91,93 @@ private final int[] counts; private final String facetsField; private final boolean useDirectSource; - private final HashMap matchingDocs = new HashMap(); + private final HashMap matchingDocs = new HashMap(); private DocValues facetsValues; private FixedBitSet bits; - + + private static class CachedInts { + final PackedInts.Reader docOffset; + final PackedInts.Reader values; + + public CachedInts(Source source, int maxDoc) { + //System.out.println("build cache maxDoc=" + maxDoc); + BytesRef buf = new BytesRef(); + + // First pass: just count how many ords we have: + + // nocommit this could require long?: + int totOrds = 0; + int maxOrd = -1; + + for(int docID=0;docID 0) { + // this document has facets + int upto = buf.offset + buf.length; + int ord = 0; + int offset = buf.offset; + int prev = 0; + while (offset < upto) { + byte b = buf.bytes[offset++]; + if (b >= 0) { + prev = ord = ((ord << 7) | b) + prev; + maxOrd = Math.max(maxOrd, ord); + totOrds++; + ord = 0; + } else { + ord = (ord << 7) | (b & 0x7F); + } + } + } + } + + // Second pass: encode to packed ints: + PackedInts.Mutable docOffset = PackedInts.getMutable(maxDoc+1, PackedInts.bitsRequired(totOrds), PackedInts.DEFAULT); + PackedInts.Mutable values = PackedInts.getMutable(totOrds, PackedInts.bitsRequired(maxOrd), PackedInts.DEFAULT); + //System.out.println(" docOffset = " + docOffset.ramBytesUsed() + " bytes"); + //System.out.println(" values = " + values.ramBytesUsed() + " bytes"); + totOrds = 0; + for(int docID=0;docID 0) { + // this document has facets + int upto = buf.offset + buf.length; + int ord = 0; + int offset = buf.offset; + int prev = 0; + while (offset < upto) { + byte b = buf.bytes[offset++]; + if (b >= 0) { + prev = ord = ((ord << 7) | b) + prev; + values.set(totOrds, ord); + totOrds++; + ord = 0; + } else { + ord = (ord << 7) | (b & 0x7F); + } + } + } + } + docOffset.set(maxDoc, totOrds); + + this.docOffset = docOffset; + this.values = values; + } + } + + private static final Map sourceCache = new HashMap(); + + private static synchronized CachedInts getCachedInts(Source source, int maxDoc) { + CachedInts ci = sourceCache.get(source); + if (ci == null) { + ci = new CachedInts(source, maxDoc); + sourceCache.put(source, ci); + } + return ci; + } + public CountingFacetsCollector(FacetSearchParams fsp, TaxonomyReader taxoReader) { this(fsp, taxoReader, new FacetArrays(taxoReader.getSize()), false); } @@ -162,7 +246,7 @@ if (facetsValues != null) { Source facetSource = useDirectSource ? facetsValues.getDirectSource() : facetsValues.getSource(); bits = new FixedBitSet(context.reader().maxDoc()); - matchingDocs.put(facetSource, bits); + matchingDocs.put(getCachedInts(facetSource, context.reader().maxDoc()), bits); } } @@ -176,31 +260,20 @@ } private void countFacets() { - for (Entry entry : matchingDocs.entrySet()) { - Source facetsSource = entry.getKey(); + for (Entry entry : matchingDocs.entrySet()) { + PackedInts.Reader docOffset = entry.getKey().docOffset; + PackedInts.Reader values = entry.getKey().values; FixedBitSet bits = entry.getValue(); int doc = 0; int length = bits.length(); while (doc < length && (doc = bits.nextSetBit(doc)) != -1) { - facetsSource .getBytes(doc, buf); - if (buf.length > 0) { - // this document has facets - int upto = buf.offset + buf.length; - int ord = 0; - int offset = buf.offset; - int prev = 0; - while (offset < upto) { - byte b = buf.bytes[offset++]; - if (b >= 0) { - prev = ord = ((ord << 7) | b) + prev; - counts[ord]++; - ord = 0; - } else { - ord = (ord << 7) | (b & 0x7F); - } - } + int start = (int) docOffset.get(doc); + int end = (int) docOffset.get(doc+1); + // nocommit use bulk read api: + for(int i=start;i