Index: modules/grouping/src/test/org/apache/lucene/search/grouping/TotalGroupCountCollectorTest.java =================================================================== --- modules/grouping/src/test/org/apache/lucene/search/grouping/TotalGroupCountCollectorTest.java (revision ) +++ modules/grouping/src/test/org/apache/lucene/search/grouping/TotalGroupCountCollectorTest.java (revision ) @@ -0,0 +1,109 @@ +package org.apache.lucene.search.grouping; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +public class TotalGroupCountCollectorTest extends LuceneTestCase { + + public void testTotalGroupCount() throws Exception { + + final String groupField = "author"; + + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter( + random, + dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); + // 0 + Document doc = new Document(); + doc.add(new Field(groupField, "author1", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(new Field("content", "random text", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(new Field("id", "1", Field.Store.YES, Field.Index.NO)); + w.addDocument(doc); + + // 1 + doc = new Document(); + doc.add(new Field(groupField, "author1", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(new Field("content", "some more random text blob", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(new Field("id", "2", Field.Store.YES, Field.Index.NO)); + w.addDocument(doc); + + // 2 + doc = new Document(); + doc.add(new Field(groupField, "author1", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(new Field("content", "some more random textual data", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(new Field("id", "3", Field.Store.YES, Field.Index.NO)); + w.addDocument(doc); + w.commit(); // To ensure a second segment + + // 3 + doc = new Document(); + doc.add(new Field(groupField, "author2", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(new Field("content", "some random text", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(new Field("id", "4", Field.Store.YES, Field.Index.NO)); + w.addDocument(doc); + + // 4 + doc = new Document(); + doc.add(new Field(groupField, "author3", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(new Field("content", "some more random text", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(new Field("id", "5", Field.Store.YES, Field.Index.NO)); + w.addDocument(doc); + + // 5 + doc = new Document(); + doc.add(new Field(groupField, "author3", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(new Field("content", "random blob", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(new Field("id", "6", Field.Store.YES, Field.Index.NO)); + w.addDocument(doc); + + // 6 -- no author field + doc = new Document(); + doc.add(new Field("content", "random word stuck in alot of other text", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(new Field("id", "6", Field.Store.YES, Field.Index.NO)); + w.addDocument(doc); + + IndexSearcher indexSearcher = new IndexSearcher(w.getReader()); + w.close(); + + TotalGroupCountCollector c1 = new TotalGroupCountCollector(groupField, indexSearcher.getIndexReader()); + indexSearcher.search(new TermQuery(new Term("content", "random")), c1); + assertEquals(4, c1.getGroupCount()); + + TotalGroupCountCollector c2 = new TotalGroupCountCollector(groupField, indexSearcher.getIndexReader()); + indexSearcher.search(new TermQuery(new Term("content", "some")), c2); + assertEquals(3, c2.getGroupCount()); + + TotalGroupCountCollector c3 = new TotalGroupCountCollector(groupField, indexSearcher.getIndexReader()); + indexSearcher.search(new TermQuery(new Term("content", "blob")), c3); + assertEquals(2, c3.getGroupCount()); + + indexSearcher.getIndexReader().close(); + dir.close(); + } +} Index: modules/grouping/src/java/org/apache/lucene/search/grouping/TotalGroupCountCollector.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/TotalGroupCountCollector.java (revision ) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/TotalGroupCountCollector.java (revision ) @@ -0,0 +1,104 @@ +package org.apache.lucene.search.grouping; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.util.BytesRef; + +import java.io.IOException; +import java.util.LinkedList; +import java.util.List; + +/** + * A collector that counts the total number of groups. + * + * @lucene.experimental + */ +public class TotalGroupCountCollector extends Collector { + + private final String groupField; + private final SentinelIntSet ordSet; + private final List countedGroups = new LinkedList(); + private final BytesRef spareBytesRef = new BytesRef(); + + private int groupCount; + private FieldCache.DocTermsIndex index; + + /** + * Constructs {@link TotalGroupCountCollector} + * + * @param groupField The field to group by + * @param maxNumberOfUniqueValues The total number of unique values for the group field that can exist for any query. + */ + public TotalGroupCountCollector(String groupField, int maxNumberOfUniqueValues) { + this.groupField = groupField; + ordSet = new SentinelIntSet(maxNumberOfUniqueValues, -1); + } + + /** + * Helper constructor. + * + * @param groupField The field to group by + * @param indexReader The reader that is used for determining the total number of unique terms for the groupField + * @throws IOException When an i/o error occurred during determining the total number of unique terms for + * the groupField + */ + public TotalGroupCountCollector(String groupField, IndexReader indexReader) throws IOException { + this(groupField, FieldCache.DEFAULT.getTermsIndex(indexReader, groupField).numOrd()); + } + + public void setScorer(Scorer scorer) throws IOException { + } + + public void collect(int doc) throws IOException { + int key = index.getOrd(doc); + if (!ordSet.exists(key)) { + groupCount++; + ordSet.put(key); + BytesRef term = index.getTerm(doc, new BytesRef()); + countedGroups.add(term); + } + } + + /** + * @return The total number of groups for the executed search + */ + public int getGroupCount() { + return groupCount; + } + + public void setNextReader(IndexReader.AtomicReaderContext context) throws IOException { + index = FieldCache.DEFAULT.getTermsIndex(context.reader, groupField); + + // Clear ordSet and fill it with previous encountered groups that can occur in the current segment. + ordSet.clear(); + for (BytesRef countedGroup : countedGroups) { + int ord = index.binarySearchLookup(countedGroup, spareBytesRef); + if (ord >= 0) { + ordSet.put(ord); + } + } + } + + public boolean acceptsDocsOutOfOrder() { + return true; + } +} \ No newline at end of file