Index: modules/grouping/src/java/org/apache/lucene/search/grouping/AllGroupsCollector.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/AllGroupsCollector.java (revision 1126761) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/AllGroupsCollector.java (revision ) @@ -17,9 +17,7 @@ * limitations under the License. */ -import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Collector; -import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.Scorer; import org.apache.lucene.util.BytesRef; @@ -35,65 +33,36 @@ * the most relevant document of a group. * *

- * Implementation detail: an int hash set (SentinelIntSet) - * is used to detect if a group is already added to the - * total count. For each segment the int set is cleared and filled - * with previous counted groups that occur in the new - * segment. + * This is an abstract version. Concrete implementations define + * what a group actually is and how it is internally collected. * * @lucene.experimental */ -public class AllGroupsCollector extends Collector { +public abstract class AllGroupsCollector extends Collector { - private static final int DEFAULT_INITIAL_SIZE = 128; + final List groups; - private final String groupField; - private final SentinelIntSet ordSet; - private final List groups; - private final BytesRef spareBytesRef = new BytesRef(); - - private FieldCache.DocTermsIndex index; - - /** - * Expert: Constructs a {@link AllGroupsCollector} - * - * @param groupField The field to group by - * @param initialSize The initial allocation size of the - * internal int set and group list - * which should roughly match the total - * number of expected unique groups. Be aware that the - * heap usage is 4 bytes * initialSize. - */ - public AllGroupsCollector(String groupField, int initialSize) { - this.groupField = groupField; - ordSet = new SentinelIntSet(initialSize, -1); - groups = new ArrayList(initialSize); + public AllGroupsCollector(int initialSize) { + groups = new ArrayList(initialSize); } - /** - * Constructs a {@link AllGroupsCollector}. This sets the - * initial allocation size for the internal int set and group - * list to 128. - * - * @param groupField The field to group by - */ - public AllGroupsCollector(String groupField) { - this(groupField, DEFAULT_INITIAL_SIZE); - } - public void setScorer(Scorer scorer) throws IOException { } public void collect(int doc) throws IOException { - int key = index.getOrd(doc); - if (!ordSet.exists(key)) { - ordSet.put(key); - BytesRef term = key == 0 ? null : index.lookup(key, new BytesRef()); - groups.add(term); + addGroupIfNotExists(doc); - } + } - } /** + * Adds a group if not already added. + * + * @param doc The doc that is used map to a group + */ + //NOTE: We could have two separate methods: existGroup() and addGroup() + // I think for now does works best for the one impl we have. + public abstract void addGroupIfNotExists(int doc); + + /** * Returns the total number of groups for the executed search. * This is a convenience method. The following code snippet has the same effect:

getGroups().size()
* @@ -111,23 +80,10 @@ * * @return the group values */ - public Collection getGroups() { + public Collection getGroups() { return groups; } - public void setNextReader(IndexReader.AtomicReaderContext context) throws IOException { - index = FieldCache.DEFAULT.getTermsIndex(context.reader, groupField); - - // Clear ordSet and fill it with previous encountered groups that can occur in the current segment. - ordSet.clear(); - for (BytesRef countedGroup : groups) { - int ord = index.binarySearchLookup(countedGroup, spareBytesRef); - if (ord >= 0) { - ordSet.put(ord); - } - } - } - public boolean acceptsDocsOutOfOrder() { return true; } Index: modules/grouping/src/java/org/apache/lucene/search/grouping/TermsAllGroupsCollector.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/TermsAllGroupsCollector.java (revision ) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/TermsAllGroupsCollector.java (revision ) @@ -0,0 +1,103 @@ +package org.apache.lucene.search.grouping; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.util.BytesRef; + +import java.io.IOException; + +/** + * A collector that collects all groups that match the + * query. Only the group value is collected, and the order + * is undefined. This collector does not determine + * the most relevant document of a group. + * + *

+ * Implementation detail: an int hash set (SentinelIntSet) + * is used to detect if a group is already added to the + * total count. For each segment the int set is cleared and filled + * with previous counted groups that occur in the new + * segment. + * + * @lucene.experimental + */ +public class TermsAllGroupsCollector extends AllGroupsCollector { + + private static final int DEFAULT_INITIAL_SIZE = 128; + + private final String groupField; + private final SentinelIntSet ordSet; + + private FieldCache.DocTermsIndex index; + private final BytesRef spareBytesRef = new BytesRef(); + + /** + * Expert: Constructs a {@link AllGroupsCollector} + * + * @param groupField The field to group by + * @param initialSize The initial allocation size of the + * internal int set and group list + * which should roughly match the total + * number of expected unique groups. Be aware that the + * heap usage is 4 bytes * initialSize. + */ + public TermsAllGroupsCollector(String groupField, int initialSize) { + super(initialSize); + ordSet = new SentinelIntSet(initialSize, -1); + this.groupField = groupField; + } + + /** + * Constructs a {@link AllGroupsCollector}. This sets the + * initial allocation size for the internal int set and group + * list to 128. + * + * @param groupField The field to group by + */ + public TermsAllGroupsCollector(String groupField) { + this(groupField, DEFAULT_INITIAL_SIZE); + } + + /** + * {@inheritDoc} + */ + public void addGroupIfNotExists(int doc) { + int key = index.getOrd(doc); + if (!ordSet.exists(key)) { + ordSet.put(key); + BytesRef term = key == 0 ? null : index.lookup(key, new BytesRef()); + groups.add(term); + } + } + + public void setNextReader(IndexReader.AtomicReaderContext context) throws IOException { + index = FieldCache.DEFAULT.getTermsIndex(context.reader, groupField); + + // Clear ordSet and fill it with previous encountered groups that can occur in the current segment. + ordSet.clear(); + for (BytesRef countedGroup : groups) { + int ord = index.binarySearchLookup(countedGroup, spareBytesRef); + if (ord >= 0) { + ordSet.put(ord); + } + } + } + +} Index: modules/grouping/src/java/org/apache/lucene/search/grouping/SecondPassGroupingCollector.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/SecondPassGroupingCollector.java (revision 1103150) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/SecondPassGroupingCollector.java (revision ) @@ -17,49 +17,41 @@ * limitations under the License. */ +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.search.*; + import java.io.IOException; import java.util.Collection; import java.util.HashMap; -import org.apache.lucene.index.IndexReader.AtomicReaderContext; -import org.apache.lucene.search.Collector; -import org.apache.lucene.search.FieldCache; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.TopDocs; -import org.apache.lucene.search.TopDocsCollector; -import org.apache.lucene.search.TopFieldCollector; -import org.apache.lucene.search.TopScoreDocCollector; -import org.apache.lucene.util.BytesRef; - /** * SecondPassGroupingCollector is the second of two passes * necessary to collect grouped docs. This pass gathers the * top N documents per top group computed from the - * first pass. + * first pass. Concrete subclasses define what a group is and how it + * is internally collected. * *

See {@link org.apache.lucene.search.grouping} for more * details including a full code example.

* * @lucene.experimental */ -public class SecondPassGroupingCollector extends Collector { - private final HashMap groupMap; +public abstract class SecondPassGroupingCollector extends Collector { + protected final HashMap> groupMap; - private FieldCache.DocTermsIndex index; - private final String groupField; + protected final String groupField; private final int maxDocsPerGroup; - private final SentinelIntSet ordSet; - private final SearchGroupDocs[] groupDocs; - private final BytesRef spareBytesRef = new BytesRef(); - private final Collection groups; + // nocommit can we somehow keep this private? + // Note (Martijn): I think we need this protected. Since subclasses need to access it. + protected SearchGroupDocs[] groupDocs; + private final Collection> groups; private final Sort withinGroupSort; private final Sort groupSort; private int totalHitCount; private int totalGroupedHitCount; - public SecondPassGroupingCollector(String groupField, Collection groups, Sort groupSort, Sort withinGroupSort, + public SecondPassGroupingCollector(String groupField, Collection> groups, Sort groupSort, Sort withinGroupSort, int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields) throws IOException { @@ -74,9 +66,9 @@ this.groupField = groupField; this.maxDocsPerGroup = maxDocsPerGroup; - groupMap = new HashMap(groups.size()); + groupMap = new HashMap>(groups.size()); - for (SearchGroup group : groups) { + for (SearchGroup group : groups) { //System.out.println(" prep group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString())); final TopDocsCollector collector; if (withinGroupSort == null) { @@ -87,25 +79,21 @@ collector = TopFieldCollector.create(withinGroupSort, maxDocsPerGroup, fillSortFields, getScores, getMaxScores, true); } groupMap.put(group.groupValue, - new SearchGroupDocs(group.groupValue, + new SearchGroupDocs(group.groupValue, - collector)); + collector)); } - - ordSet = new SentinelIntSet(groupMap.size(), -1); - groupDocs = new SearchGroupDocs[ordSet.keys.length]; } @Override public void setScorer(Scorer scorer) throws IOException { - for (SearchGroupDocs group : groupMap.values()) { + for (SearchGroupDocs group : groupMap.values()) { group.collector.setScorer(scorer); } } @Override public void collect(int doc) throws IOException { - final int slot = ordSet.find(index.getOrd(doc)); - //System.out.println("SP.collect doc=" + doc + " slot=" + slot); + final int slot = getDocSlot(doc); totalHitCount++; if (slot >= 0) { totalGroupedHitCount++; @@ -113,24 +101,23 @@ } } + /** + * Returns the slot the specified doc belongs to. + * + * @param doc The specified doc + * @return the slot the specified doc belongs to. + * @throws IOException If an I/O related error occurred + */ + //NOTE: I wonder how this pans out for fq. Fq don't have ords, so we need to allocate a larger groupDocs array. + protected abstract int getDocSlot(int doc) throws IOException; + @Override public void setNextReader(AtomicReaderContext readerContext) throws IOException { //System.out.println("SP.setNextReader"); - for (SearchGroupDocs group : groupMap.values()) { + for (SearchGroupDocs group : groupMap.values()) { group.collector.setNextReader(readerContext); } - index = FieldCache.DEFAULT.getTermsIndex(readerContext.reader, groupField); - - // Rebuild ordSet - ordSet.clear(); - for (SearchGroupDocs group : groupMap.values()) { - //System.out.println(" group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString())); - int ord = group.groupValue == null ? 0 : index.binarySearchLookup(group.groupValue, spareBytesRef); - if (ord >= 0) { - groupDocs[ordSet.put(ord)] = group; - } + } - } - } @Override public boolean acceptsDocsOutOfOrder() { @@ -138,22 +125,23 @@ } public TopGroups getTopGroups(int withinGroupOffset) { - final GroupDocs[] groupDocsResult = new GroupDocs[groups.size()]; + // nocommit suppress + final GroupDocs[] groupDocsResult = (GroupDocs[]) new GroupDocs[groups.size()]; int groupIDX = 0; for(SearchGroup group : groups) { - final SearchGroupDocs groupDocs = groupMap.get(group.groupValue); + final SearchGroupDocs groupDocs = groupMap.get(group.groupValue); final TopDocs topDocs = groupDocs.collector.topDocs(withinGroupOffset, maxDocsPerGroup); - groupDocsResult[groupIDX++] = new GroupDocs(topDocs.getMaxScore(), + groupDocsResult[groupIDX++] = new GroupDocs(topDocs.getMaxScore(), - topDocs.totalHits, - topDocs.scoreDocs, - groupDocs.groupValue, - group.sortValues); + topDocs.totalHits, + topDocs.scoreDocs, + groupDocs.groupValue, + group.sortValues); } - return new TopGroups(groupSort.getSort(), + return new TopGroups(groupSort.getSort(), - withinGroupSort == null ? null : withinGroupSort.getSort(), - totalHitCount, totalGroupedHitCount, groupDocsResult); + withinGroupSort == null ? null : withinGroupSort.getSort(), + totalHitCount, totalGroupedHitCount, groupDocsResult); } } @@ -161,11 +149,11 @@ // TODO: merge with SearchGroup or not? // ad: don't need to build a new hashmap // disad: blows up the size of SearchGroup if we need many of them, and couples implementations -class SearchGroupDocs { - public final BytesRef groupValue; +class SearchGroupDocs { + public final GROUP_VALUE_TYPE groupValue; public final TopDocsCollector collector; - public SearchGroupDocs(BytesRef groupValue, TopDocsCollector collector) { + public SearchGroupDocs(GROUP_VALUE_TYPE groupValue, TopDocsCollector collector) { this.groupValue = groupValue; this.collector = collector; } Index: modules/grouping/src/java/org/apache/lucene/search/grouping/GroupDocs.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/GroupDocs.java (revision 1103024) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/GroupDocs.java (revision ) @@ -23,10 +23,10 @@ /** Represents one group in the results. * * @lucene.experimental */ -public class GroupDocs { +public class GroupDocs { /** The groupField value for all docs in this group; this * may be null if hits did not have the groupField. */ - public final BytesRef groupValue; + public final GROUP_VALUE_TYPE groupValue; /** Max score in this group */ public final float maxScore; @@ -46,7 +46,7 @@ public GroupDocs(float maxScore, int totalHits, ScoreDoc[] scoreDocs, - BytesRef groupValue, + GROUP_VALUE_TYPE groupValue, Comparable[] groupSortValues) { this.maxScore = maxScore; this.totalHits = totalHits; Index: modules/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java (revision 1104421) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java (revision ) @@ -22,7 +22,7 @@ /** Represents result returned by a grouping search. * * @lucene.experimental */ -public class TopGroups { +public class TopGroups { /** Number of documents matching the search */ public final int totalHitCount; @@ -33,7 +33,7 @@ public final Integer totalGroupCount; /** Group results in groupSort order */ - public final GroupDocs[] groups; + public final GroupDocs[] groups; /** How groups are sorted against each other */ public final SortField[] groupSort; @@ -41,7 +41,7 @@ /** How docs are sorted within each group */ public final SortField[] withinGroupSort; - public TopGroups(SortField[] groupSort, SortField[] withinGroupSort, int totalHitCount, int totalGroupedHitCount, GroupDocs[] groups) { + public TopGroups(SortField[] groupSort, SortField[] withinGroupSort, int totalHitCount, int totalGroupedHitCount, GroupDocs[] groups) { this.groupSort = groupSort; this.withinGroupSort = withinGroupSort; this.totalHitCount = totalHitCount; Index: modules/grouping/src/java/org/apache/lucene/search/grouping/SearchGroup.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/SearchGroup.java (revision 1103102) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/SearchGroup.java (revision ) @@ -17,10 +17,8 @@ * limitations under the License. */ -import org.apache.lucene.util.BytesRef; - /** @lucene.experimental */ -public class SearchGroup { - public BytesRef groupValue; +public class SearchGroup { + public GROUP_VALUE_TYPE groupValue; public Comparable[] sortValues; } Index: modules/grouping/src/java/org/apache/lucene/search/grouping/TermSecondPassGroupingCollector.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/TermSecondPassGroupingCollector.java (revision ) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/TermSecondPassGroupingCollector.java (revision ) @@ -0,0 +1,71 @@ +package org.apache.lucene.search.grouping; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.search.Sort; +import org.apache.lucene.util.BytesRef; + +import java.io.IOException; +import java.util.Collection; + +/** + * Concrete implementation of {@link SecondPassGroupingCollector} that groups based on + * field values and more specifically uses {@link org.apache.lucene.search.FieldCache.DocTerms} + * to collect grouped docs. + * + * @lucene.experimental + */ +public class TermSecondPassGroupingCollector extends SecondPassGroupingCollector { + + private final SentinelIntSet ordSet; + private FieldCache.DocTermsIndex index; + private final BytesRef spareBytesRef = new BytesRef(); + + public TermSecondPassGroupingCollector(String groupField, Collection> groups, Sort groupSort, Sort withinGroupSort, + int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields) + throws IOException { + super(groupField, groups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields); + ordSet = new SentinelIntSet(groupMap.size(), -1); +// groupDocs = new SearchGroupDocs[groupMap.size()]; + // nocommit suppress warning + groupDocs = (SearchGroupDocs[]) new SearchGroupDocs[ordSet.keys.length]; + } + + @Override + public void setNextReader(AtomicReaderContext readerContext) throws IOException { + super.setNextReader(readerContext); + index = FieldCache.DEFAULT.getTermsIndex(readerContext.reader, groupField); + + // Rebuild ordSet + ordSet.clear(); + for (SearchGroupDocs group : groupMap.values()) { + //System.out.println(" group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString())); + int ord = group.groupValue == null ? 0 : index.binarySearchLookup(group.groupValue, spareBytesRef); + if (ord >= 0) { + groupDocs[ordSet.put(ord)] = group; + } + } + } + + @Override + protected int getDocSlot(int doc) { + return ordSet.find(index.getOrd(doc)); + } +} \ No newline at end of file Index: modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java =================================================================== --- modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java (revision 1104421) +++ modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java (revision ) @@ -91,15 +91,15 @@ IndexSearcher indexSearcher = new IndexSearcher(w.getReader()); w.close(); - AllGroupsCollector c1 = new AllGroupsCollector(groupField); + AllGroupsCollector c1 = new TermsAllGroupsCollector(groupField); indexSearcher.search(new TermQuery(new Term("content", "random")), c1); assertEquals(4, c1.getGroupCount()); - AllGroupsCollector c2 = new AllGroupsCollector(groupField); + AllGroupsCollector c2 = new TermsAllGroupsCollector(groupField); indexSearcher.search(new TermQuery(new Term("content", "some")), c2); assertEquals(3, c2.getGroupCount()); - AllGroupsCollector c3 = new AllGroupsCollector(groupField); + AllGroupsCollector c3 = new TermsAllGroupsCollector(groupField); indexSearcher.search(new TermQuery(new Term("content", "blob")), c3); assertEquals(2, c3.getGroupCount()); Index: modules/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java (revision 1103150) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java (revision ) @@ -17,48 +17,36 @@ * limitations under the License. */ -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Comparator; -import java.util.HashMap; -import java.util.TreeSet; - import org.apache.lucene.index.IndexReader.AtomicReaderContext; -import org.apache.lucene.search.Collector; -import org.apache.lucene.search.FieldCache; -import org.apache.lucene.search.FieldComparator; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.SortField; -import org.apache.lucene.util.BytesRef; +import org.apache.lucene.search.*; +import java.io.IOException; +import java.util.*; + /** FirstPassGroupingCollector is the first of two passes necessary * to collect grouped hits. This pass gathers the top N sorted - * groups. + * groups. Concrete subclasses define what a group is and how it + * is internally collected. * *

See {@link org.apache.lucene.search.grouping} for more * details including a full code example.

* * @lucene.experimental */ +abstract public class FirstPassGroupingCollector extends Collector { -public class FirstPassGroupingCollector extends Collector { - - private final String groupField; + protected final String groupField; private final Sort groupSort; private final FieldComparator[] comparators; private final int[] reversed; private final int topNGroups; - private final HashMap groupMap; - private final BytesRef scratchBytesRef = new BytesRef(); + private final HashMap> groupMap; private final int compIDXEnd; // Set once we reach topNGroups unique groups: - private TreeSet orderedGroups; + private TreeSet> orderedGroups; private int docBase; private int spareSlot; - private FieldCache.DocTermsIndex index; /** * Create the first pass collector. @@ -100,7 +88,7 @@ } spareSlot = topNGroups; - groupMap = new HashMap(topNGroups); + groupMap = new HashMap>(topNGroups); } /** Returns top groups, starting from offset. This may @@ -125,12 +113,12 @@ final Collection result = new ArrayList(); int upto = 0; final int sortFieldCount = groupSort.getSort().length; - for(CollectedSearchGroup group : orderedGroups) { + for(CollectedSearchGroup group : orderedGroups) { if (upto++ < groupOffset) { continue; } //System.out.println(" group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString())); - SearchGroup searchGroup = new SearchGroup(); + SearchGroup searchGroup = new SearchGroup(); searchGroup.groupValue = group.groupValue; if (fillFields) { searchGroup.sortValues = new Comparable[sortFieldCount]; @@ -189,14 +177,10 @@ // TODO: should we add option to mean "ignore docs that // don't have the group field" (instead of stuffing them // under null group)? - final int ord = index.getOrd(doc); - //System.out.println(" ord=" + ord); + final GROUP_VALUE_TYPE groupValue = getDocGroupValue(doc); - final BytesRef br = ord == 0 ? null : index.lookup(ord, scratchBytesRef); - //System.out.println(" group=" + (br == null ? "null" : br.utf8ToString())); + final CollectedSearchGroup group = groupMap.get(groupValue); - final CollectedSearchGroup group = groupMap.get(br); - if (group == null) { // First time we are seeing this group, or, we've seen @@ -210,8 +194,8 @@ // just keep collecting them // Add a new CollectedSearchGroup: - CollectedSearchGroup sg = new CollectedSearchGroup(); - sg.groupValue = ord == 0 ? null : new BytesRef(scratchBytesRef); + CollectedSearchGroup sg = new CollectedSearchGroup(); + sg.groupValue = copyDocGroupValue(groupValue, null); sg.comparatorSlot = groupMap.size(); sg.topDoc = docBase + doc; for (FieldComparator fc : comparators) { @@ -233,20 +217,14 @@ // the bottom group with this new group. // java 6-only: final CollectedSearchGroup bottomGroup = orderedGroups.pollLast(); - final CollectedSearchGroup bottomGroup = orderedGroups.last(); + final CollectedSearchGroup bottomGroup = orderedGroups.last(); orderedGroups.remove(bottomGroup); assert orderedGroups.size() == topNGroups -1; groupMap.remove(bottomGroup.groupValue); // reuse the removed CollectedSearchGroup - if (br == null) { - bottomGroup.groupValue = null; - } else if (bottomGroup.groupValue != null) { - bottomGroup.groupValue.copy(br); - } else { - bottomGroup.groupValue = new BytesRef(br); - } + bottomGroup.groupValue = copyDocGroupValue(groupValue, bottomGroup.groupValue); bottomGroup.topDoc = docBase + doc; for (FieldComparator fc : comparators) { @@ -291,7 +269,7 @@ // Remove before updating the group since lookup is done via comparators // TODO: optimize this - final CollectedSearchGroup prevLast; + final CollectedSearchGroup prevLast; if (orderedGroups != null) { prevLast = orderedGroups.last(); orderedGroups.remove(group); @@ -336,7 +314,7 @@ } }; - orderedGroups = new TreeSet(comparator); + orderedGroups = new TreeSet>(comparator); orderedGroups.addAll(groupMap.values()); assert orderedGroups.size() > 0; @@ -353,15 +331,31 @@ @Override public void setNextReader(AtomicReaderContext readerContext) throws IOException { docBase = readerContext.docBase; - index = FieldCache.DEFAULT.getTermsIndex(readerContext.reader, groupField); - for (int i=0; i extends SearchGroup { int topDoc; int comparatorSlot; } Index: modules/grouping/src/java/org/apache/lucene/search/grouping/TermFirstPassGroupingCollector.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/TermFirstPassGroupingCollector.java (revision ) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/TermFirstPassGroupingCollector.java (revision ) @@ -0,0 +1,66 @@ +package org.apache.lucene.search.grouping; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.search.Sort; +import org.apache.lucene.util.BytesRef; + +import java.io.IOException; + +/** + * Concrete implementation of {@link FirstPassGroupingCollector} that groups based on + * field values and more specifically uses {@link org.apache.lucene.search.FieldCache.DocTerms} + * to collect groups. + * + * @lucene.experimental + */ +public class TermFirstPassGroupingCollector extends FirstPassGroupingCollector { + + private final BytesRef scratchBytesRef = new BytesRef(); + private FieldCache.DocTermsIndex index; + + public TermFirstPassGroupingCollector(String groupField, Sort groupSort, int topNGroups) throws IOException { + super(groupField, groupSort, topNGroups); + } + + @Override + protected BytesRef getDocGroupValue(int doc) { + final int ord = index.getOrd(doc); + return ord == 0 ? null : index.lookup(ord, scratchBytesRef); + } + + @Override + protected BytesRef copyDocGroupValue(BytesRef groupValue, BytesRef reuse) { + if (groupValue == null) { + return null; + } else if (reuse != null) { + reuse.copy(groupValue); + return reuse; + } else { + return new BytesRef(groupValue); + } + } + + @Override + public void setNextReader(AtomicReaderContext readerContext) throws IOException { + super.setNextReader(readerContext); + index = FieldCache.DEFAULT.getTermsIndex(readerContext.reader, groupField); + } +} Index: modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java =================================================================== --- modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java (revision 1124379) +++ modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java (revision ) @@ -17,8 +17,6 @@ package org.apache.lucene.search.grouping; -import java.util.*; - import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -32,6 +30,8 @@ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; +import java.util.*; + // TODO // - should test relevance sort too // - test null @@ -102,10 +102,10 @@ w.close(); final Sort groupSort = Sort.RELEVANCE; - final FirstPassGroupingCollector c1 = new FirstPassGroupingCollector(groupField, groupSort, 10); + final FirstPassGroupingCollector c1 = new TermFirstPassGroupingCollector(groupField, groupSort, 10); indexSearcher.search(new TermQuery(new Term("content", "random")), c1); - final SecondPassGroupingCollector c2 = new SecondPassGroupingCollector(groupField, c1.getTopGroups(0, true), groupSort, null, 5, true, false, true); + final SecondPassGroupingCollector c2 = new TermSecondPassGroupingCollector(groupField, c1.getTopGroups(0, true), groupSort, null, 5, true, false, true); indexSearcher.search(new TermQuery(new Term("content", "random")), c2); final TopGroups groups = c2.getTopGroups(0); @@ -437,12 +437,12 @@ final AllGroupsCollector allGroupsCollector; if (doAllGroups) { - allGroupsCollector = new AllGroupsCollector("group"); + allGroupsCollector = new TermsAllGroupsCollector("group"); } else { allGroupsCollector = null; } - final FirstPassGroupingCollector c1 = new FirstPassGroupingCollector("group", groupSort, groupOffset+topNGroups); + final FirstPassGroupingCollector c1 = new TermFirstPassGroupingCollector("group", groupSort, groupOffset+topNGroups); final CachingCollector cCache; final Collector c; @@ -493,19 +493,19 @@ } } - final Collection topGroups = c1.getTopGroups(groupOffset, fillFields); + final Collection> topGroups = c1.getTopGroups(groupOffset, fillFields); final TopGroups groupsResult; if (topGroups != null) { if (VERBOSE) { System.out.println("TEST: topGroups"); - for (SearchGroup searchGroup : topGroups) { + for (SearchGroup searchGroup : topGroups) { System.out.println(" " + (searchGroup.groupValue == null ? "null" : searchGroup.groupValue.utf8ToString()) + ": " + Arrays.deepToString(searchGroup.sortValues)); } } - final SecondPassGroupingCollector c2 = new SecondPassGroupingCollector("group", topGroups, groupSort, docSort, docOffset+docsPerGroup, getScores, getMaxScores, fillFields); + final SecondPassGroupingCollector c2 = new TermSecondPassGroupingCollector("group", topGroups, groupSort, docSort, docOffset+docsPerGroup, getScores, getMaxScores, fillFields); if (doCache) { if (cCache.isCached()) { if (VERBOSE) {