Index: modules/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java
===================================================================
--- modules/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java (revision 1103024)
+++ modules/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java (revision )
@@ -21,9 +21,6 @@
/** Represents result returned by a grouping search.
*
- * Note that we do not return the total number of unique
- * groups; doing so would be costly.
- *
* @lucene.experimental */
public class TopGroups {
/** Number of documents matching the search */
@@ -32,6 +29,9 @@
/** Number of documents grouped into the topN groups */
public final int totalGroupedHitCount;
+ /** The total number of unique groups. If null this value is not computed. */
+ public final Integer totalGroupCount;
+
/** Group results in groupSort order */
public final GroupDocs[] groups;
@@ -47,5 +47,15 @@
this.totalHitCount = totalHitCount;
this.totalGroupedHitCount = totalGroupedHitCount;
this.groups = groups;
+ this.totalGroupCount = null;
}
+
+ public TopGroups(TopGroups oldTopGroups, Integer totalGroupCount) {
+ this.groupSort = oldTopGroups.groupSort;
+ this.withinGroupSort = oldTopGroups.withinGroupSort;
+ this.totalHitCount = oldTopGroups.totalHitCount;
+ this.totalGroupedHitCount = oldTopGroups.totalGroupedHitCount;
+ this.groups = oldTopGroups.groups;
+ this.totalGroupCount = totalGroupCount;
-}
+ }
+}
Index: modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java
===================================================================
--- modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java (revision )
+++ modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java (revision )
@@ -0,0 +1,109 @@
+package org.apache.lucene.search.grouping;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.LuceneTestCase;
+
+public class AllGroupsCollectorTest extends LuceneTestCase {
+
+ public void testTotalGroupCount() throws Exception {
+
+ final String groupField = "author";
+
+ Directory dir = newDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(
+ random,
+ dir,
+ newIndexWriterConfig(TEST_VERSION_CURRENT,
+ new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
+ // 0
+ Document doc = new Document();
+ doc.add(new Field(groupField, "author1", Field.Store.YES, Field.Index.ANALYZED));
+ doc.add(new Field("content", "random text", Field.Store.YES, Field.Index.ANALYZED));
+ doc.add(new Field("id", "1", Field.Store.YES, Field.Index.NO));
+ w.addDocument(doc);
+
+ // 1
+ doc = new Document();
+ doc.add(new Field(groupField, "author1", Field.Store.YES, Field.Index.ANALYZED));
+ doc.add(new Field("content", "some more random text blob", Field.Store.YES, Field.Index.ANALYZED));
+ doc.add(new Field("id", "2", Field.Store.YES, Field.Index.NO));
+ w.addDocument(doc);
+
+ // 2
+ doc = new Document();
+ doc.add(new Field(groupField, "author1", Field.Store.YES, Field.Index.ANALYZED));
+ doc.add(new Field("content", "some more random textual data", Field.Store.YES, Field.Index.ANALYZED));
+ doc.add(new Field("id", "3", Field.Store.YES, Field.Index.NO));
+ w.addDocument(doc);
+ w.commit(); // To ensure a second segment
+
+ // 3
+ doc = new Document();
+ doc.add(new Field(groupField, "author2", Field.Store.YES, Field.Index.ANALYZED));
+ doc.add(new Field("content", "some random text", Field.Store.YES, Field.Index.ANALYZED));
+ doc.add(new Field("id", "4", Field.Store.YES, Field.Index.NO));
+ w.addDocument(doc);
+
+ // 4
+ doc = new Document();
+ doc.add(new Field(groupField, "author3", Field.Store.YES, Field.Index.ANALYZED));
+ doc.add(new Field("content", "some more random text", Field.Store.YES, Field.Index.ANALYZED));
+ doc.add(new Field("id", "5", Field.Store.YES, Field.Index.NO));
+ w.addDocument(doc);
+
+ // 5
+ doc = new Document();
+ doc.add(new Field(groupField, "author3", Field.Store.YES, Field.Index.ANALYZED));
+ doc.add(new Field("content", "random blob", Field.Store.YES, Field.Index.ANALYZED));
+ doc.add(new Field("id", "6", Field.Store.YES, Field.Index.NO));
+ w.addDocument(doc);
+
+ // 6 -- no author field
+ doc = new Document();
+ doc.add(new Field("content", "random word stuck in alot of other text", Field.Store.YES, Field.Index.ANALYZED));
+ doc.add(new Field("id", "6", Field.Store.YES, Field.Index.NO));
+ w.addDocument(doc);
+
+ IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
+ w.close();
+
+ AllGroupsCollector c1 = new AllGroupsCollector(groupField);
+ indexSearcher.search(new TermQuery(new Term("content", "random")), c1);
+ assertEquals(4, c1.getGroupCount());
+
+ AllGroupsCollector c2 = new AllGroupsCollector(groupField);
+ indexSearcher.search(new TermQuery(new Term("content", "some")), c2);
+ assertEquals(3, c2.getGroupCount());
+
+ AllGroupsCollector c3 = new AllGroupsCollector(groupField);
+ indexSearcher.search(new TermQuery(new Term("content", "blob")), c3);
+ assertEquals(2, c3.getGroupCount());
+
+ indexSearcher.getIndexReader().close();
+ dir.close();
+ }
+}
Index: modules/grouping/src/java/org/apache/lucene/search/grouping/package.html
===================================================================
--- modules/grouping/src/java/org/apache/lucene/search/grouping/package.html (revision 1103150)
+++ modules/grouping/src/java/org/apache/lucene/search/grouping/package.html (revision )
@@ -88,6 +88,13 @@
boolean fillFields = true;
SecondPassGroupingCollector c2 = new SecondPassGroupingCollector("author", topGroups, groupSort, docSort, docOffset+docsPerGroup, getScores, getMaxScores, fillFields);
+ //Optionally compute total group count
+ AllGroupsCollector allGroupsCollector = null;
+ if (requiredTotalGroupCount) {
+ allGroupsCollector = new AllGroupsCollector("author");
+ c2 = MultiCollector.wrap(c2, allGroupsCollector);
+ }
+
if (cachedCollector.isCached()) {
// Cache fit within maxCacheRAMMB, so we can replay it:
cachedCollector.replay(c2);
@@ -97,6 +104,9 @@
}
TopGroups groupsResult = c2.getTopGroups(docOffset);
+ if (requiredTotalGroupCount) {
+ groupResult = new TopGroups(groupsResult, allGroupsCollector.getGroupCount());
+ }
// Render groupsResult...
Index: modules/grouping/src/java/org/apache/lucene/search/grouping/AllGroupsCollector.java
===================================================================
--- modules/grouping/src/java/org/apache/lucene/search/grouping/AllGroupsCollector.java (revision )
+++ modules/grouping/src/java/org/apache/lucene/search/grouping/AllGroupsCollector.java (revision )
@@ -0,0 +1,126 @@
+package org.apache.lucene.search.grouping;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.FieldCache;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.util.BytesRef;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+/**
+ * A collector that collects all groups that match the query. Only the group value is collected. It can't determine
+ * the most relevant document of a group.
+ *
getGroups().size()+ * + * @return The total number of groups for the executed search + */ + public int getGroupCount() { + return groups.size(); + } + + /** + * Returns the group values + * + * This is an unordered collections of group values. For each group that matched the query there is a {@link BytesRef} + * representing a group value. + * + * @return the group values + */ + public Collection