Index: lucene/facet/src/java/org/apache/lucene/facet/search/ScoredDocIdCollector.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/ScoredDocIdCollector.java (revision 1420479) +++ lucene/facet/src/java/org/apache/lucene/facet/search/ScoredDocIdCollector.java (working copy) @@ -193,6 +193,11 @@ : new NonScoringDocIdCollector(maxDoc); } + // nocommit + public FixedBitSet getDocIDs() { + return docIds; + } + private ScoredDocIdCollector(int maxDoc) { numDocIds = 0; docIds = new FixedBitSet(maxDoc); Index: lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/CountingAggregator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/CountingAggregator.java (revision 1420479) +++ lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/CountingAggregator.java (working copy) @@ -43,6 +43,11 @@ this.counterArray = counterArray; } + // nocommit + public int[] getCountArray() { + return counterArray; + } + @Override public boolean equals(Object obj) { if (obj == null || obj.getClass() != this.getClass()) { Index: lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java (revision 1420479) +++ lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java (working copy) @@ -8,16 +8,24 @@ import java.util.logging.Level; import java.util.logging.Logger; -import org.apache.lucene.index.IndexReader; - +import org.apache.lucene.facet.index.params.CategoryListParams; import org.apache.lucene.facet.search.aggregator.Aggregator; +import org.apache.lucene.facet.search.aggregator.CountingAggregator; +import org.apache.lucene.facet.search.params.FacetRequest; import org.apache.lucene.facet.search.params.FacetSearchParams; -import org.apache.lucene.facet.search.params.FacetRequest; import org.apache.lucene.facet.search.results.FacetResult; import org.apache.lucene.facet.search.results.IntermediateFacetResult; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.facet.util.PartitionsUtils; import org.apache.lucene.facet.util.ScoredDocIdsUtils; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.Fields; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.FixedBitSet; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -237,21 +245,97 @@ for (Entry entry : categoryLists.entrySet()) { CategoryListIterator categoryList = entry.getKey(); + // nocommit + /* if (!categoryList.init()) { continue; } + */ Aggregator categorator = entry.getValue(); - ScoredDocIDsIterator iterator = docids.iterator(); - while (iterator.next()) { - int docID = iterator.getDocID(); - if (!categoryList.skipTo(docID)) { - continue; + + // nocommit need more if checks, eg decoder must be + // dGap(vInt(payload)) + if (categorator instanceof CountingAggregator) { + FixedBitSet docs = (FixedBitSet) docids.getDocIDs(); + int[] counts = ((CountingAggregator) categorator).getCountArray(); + + int docBase = 0; + final int endDoc = docs.length()-1; + assert endDoc >= 0; + + // nocommit is OpenBitSetIterator faster than just + // using nextSetBit...? + int nextMatchingDoc = docs.nextSetBit(0); + for(AtomicReaderContext ctx : indexReader.leaves()) { + //System.out.println("reader=" + ctx.reader() + " nextMatchingDoc=" + nextMatchingDoc); + Fields fields = ctx.reader().fields(); + if (fields != null) { + // nocommit use the right term from this CLP: + Terms terms = fields.terms(CategoryListParams.DEFAULT_TERM.field()); + if (terms != null) { + TermsEnum termsEnum = terms.iterator(null); + if (termsEnum.seekExact(CategoryListParams.DEFAULT_TERM.bytes(), true)) { + int maxDoc = docBase + ctx.reader().maxDoc(); + DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, + null, + DocsAndPositionsEnum.FLAG_PAYLOADS); + while(nextMatchingDoc < maxDoc) { + int target = nextMatchingDoc - docBase; + int doc = dpEnum.advance(target); + if (doc == target) { + assert dpEnum.freq() == 1; + dpEnum.nextPosition(); + BytesRef payload = dpEnum.getPayload(); + assert payload != null; + + int upto = payload.offset; + final int end = payload.offset + payload.length; + int ord = 0; + int accum = 0; + final byte[] bytes = payload.bytes; + while(upto < end) { + byte b = bytes[upto++]; + accum = (accum << 7) | (b & 0x7F); + if (b >= 0) { + ord += accum; + //System.out.println(" doc=" + nextMatchingDoc + " ord=" + ord); + counts[ord]++; + accum = 0; + } + } + assert accum == 0; + if (nextMatchingDoc == endDoc) { + nextMatchingDoc = DocsAndPositionsEnum.NO_MORE_DOCS; + } else { + nextMatchingDoc = docs.nextSetBit(1+nextMatchingDoc); + } + } else if (doc == DocsAndPositionsEnum.NO_MORE_DOCS) { + if (maxDoc == docs.length()) { + nextMatchingDoc = DocsAndPositionsEnum.NO_MORE_DOCS; + } else { + nextMatchingDoc = docs.nextSetBit(maxDoc); + } + } + } + + docBase = maxDoc; + } + } + } } - categorator.setNextDoc(docID, iterator.getScore()); - long ordinal; - while ((ordinal = categoryList.nextCategory()) <= Integer.MAX_VALUE) { - categorator.aggregate((int) ordinal); + } else { + ScoredDocIDsIterator iterator = docids.iterator(); + while (iterator.next()) { + int docID = iterator.getDocID(); + if (!categoryList.skipTo(docID)) { + continue; + } + categorator.setNextDoc(docID, iterator.getScore()); + long ordinal; + while ((ordinal = categoryList.nextCategory()) <= Integer.MAX_VALUE) { + categorator.aggregate((int) ordinal); + } } } }