Index: modules/grouping/src/java/org/apache/lucene/search/grouping/AllGroupsCollector.java
===================================================================
--- modules/grouping/src/java/org/apache/lucene/search/grouping/AllGroupsCollector.java (revision 1126761)
+++ modules/grouping/src/java/org/apache/lucene/search/grouping/AllGroupsCollector.java (revision )
@@ -17,9 +17,7 @@
* limitations under the License.
*/
-import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Collector;
-import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.BytesRef;
@@ -35,65 +33,36 @@
* the most relevant document of a group.
*
*
- * Implementation detail: an int hash set (SentinelIntSet)
- * is used to detect if a group is already added to the
- * total count. For each segment the int set is cleared and filled
- * with previous counted groups that occur in the new
- * segment.
+ * This is an abstract version. Concrete implementations define
+ * what a group actually is and how it is internally collected.
*
* @lucene.experimental
*/
-public class AllGroupsCollector extends Collector {
+public abstract class AllGroupsCollector extends Collector {
- private static final int DEFAULT_INITIAL_SIZE = 128;
+ final List groups;
- private final String groupField;
- private final SentinelIntSet ordSet;
- private final List groups;
- private final BytesRef spareBytesRef = new BytesRef();
-
- private FieldCache.DocTermsIndex index;
-
- /**
- * Expert: Constructs a {@link AllGroupsCollector}
- *
- * @param groupField The field to group by
- * @param initialSize The initial allocation size of the
- * internal int set and group list
- * which should roughly match the total
- * number of expected unique groups. Be aware that the
- * heap usage is 4 bytes * initialSize.
- */
- public AllGroupsCollector(String groupField, int initialSize) {
- this.groupField = groupField;
- ordSet = new SentinelIntSet(initialSize, -1);
- groups = new ArrayList(initialSize);
+ public AllGroupsCollector(int initialSize) {
+ groups = new ArrayList(initialSize);
}
- /**
- * Constructs a {@link AllGroupsCollector}. This sets the
- * initial allocation size for the internal int set and group
- * list to 128.
- *
- * @param groupField The field to group by
- */
- public AllGroupsCollector(String groupField) {
- this(groupField, DEFAULT_INITIAL_SIZE);
- }
-
public void setScorer(Scorer scorer) throws IOException {
}
public void collect(int doc) throws IOException {
- int key = index.getOrd(doc);
- if (!ordSet.exists(key)) {
- ordSet.put(key);
- BytesRef term = key == 0 ? null : index.lookup(key, new BytesRef());
- groups.add(term);
+ addGroupIfNotExists(doc);
- }
+ }
- }
/**
+ * Adds a group if not already added.
+ *
+ * @param doc The doc that is used map to a group
+ */
+ //NOTE: We could have two separate methods: existGroup() and addGroup()
+ // I think for now does works best for the one impl we have.
+ public abstract void addGroupIfNotExists(int doc);
+
+ /**
* Returns the total number of groups for the executed search.
* This is a convenience method. The following code snippet has the same effect: getGroups().size()
*
@@ -111,23 +80,10 @@
*
* @return the group values
*/
- public Collection getGroups() {
+ public Collection getGroups() {
return groups;
}
- public void setNextReader(IndexReader.AtomicReaderContext context) throws IOException {
- index = FieldCache.DEFAULT.getTermsIndex(context.reader, groupField);
-
- // Clear ordSet and fill it with previous encountered groups that can occur in the current segment.
- ordSet.clear();
- for (BytesRef countedGroup : groups) {
- int ord = index.binarySearchLookup(countedGroup, spareBytesRef);
- if (ord >= 0) {
- ordSet.put(ord);
- }
- }
- }
-
public boolean acceptsDocsOutOfOrder() {
return true;
}
Index: modules/grouping/src/java/org/apache/lucene/search/grouping/TermsAllGroupsCollector.java
===================================================================
--- modules/grouping/src/java/org/apache/lucene/search/grouping/TermsAllGroupsCollector.java (revision )
+++ modules/grouping/src/java/org/apache/lucene/search/grouping/TermsAllGroupsCollector.java (revision )
@@ -0,0 +1,103 @@
+package org.apache.lucene.search.grouping;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.FieldCache;
+import org.apache.lucene.util.BytesRef;
+
+import java.io.IOException;
+
+/**
+ * A collector that collects all groups that match the
+ * query. Only the group value is collected, and the order
+ * is undefined. This collector does not determine
+ * the most relevant document of a group.
+ *
+ *
+ * Implementation detail: an int hash set (SentinelIntSet)
+ * is used to detect if a group is already added to the
+ * total count. For each segment the int set is cleared and filled
+ * with previous counted groups that occur in the new
+ * segment.
+ *
+ * @lucene.experimental
+ */
+public class TermsAllGroupsCollector extends AllGroupsCollector {
+
+ private static final int DEFAULT_INITIAL_SIZE = 128;
+
+ private final String groupField;
+ private final SentinelIntSet ordSet;
+
+ private FieldCache.DocTermsIndex index;
+ private final BytesRef spareBytesRef = new BytesRef();
+
+ /**
+ * Expert: Constructs a {@link AllGroupsCollector}
+ *
+ * @param groupField The field to group by
+ * @param initialSize The initial allocation size of the
+ * internal int set and group list
+ * which should roughly match the total
+ * number of expected unique groups. Be aware that the
+ * heap usage is 4 bytes * initialSize.
+ */
+ public TermsAllGroupsCollector(String groupField, int initialSize) {
+ super(initialSize);
+ ordSet = new SentinelIntSet(initialSize, -1);
+ this.groupField = groupField;
+ }
+
+ /**
+ * Constructs a {@link AllGroupsCollector}. This sets the
+ * initial allocation size for the internal int set and group
+ * list to 128.
+ *
+ * @param groupField The field to group by
+ */
+ public TermsAllGroupsCollector(String groupField) {
+ this(groupField, DEFAULT_INITIAL_SIZE);
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public void addGroupIfNotExists(int doc) {
+ int key = index.getOrd(doc);
+ if (!ordSet.exists(key)) {
+ ordSet.put(key);
+ BytesRef term = key == 0 ? null : index.lookup(key, new BytesRef());
+ groups.add(term);
+ }
+ }
+
+ public void setNextReader(IndexReader.AtomicReaderContext context) throws IOException {
+ index = FieldCache.DEFAULT.getTermsIndex(context.reader, groupField);
+
+ // Clear ordSet and fill it with previous encountered groups that can occur in the current segment.
+ ordSet.clear();
+ for (BytesRef countedGroup : groups) {
+ int ord = index.binarySearchLookup(countedGroup, spareBytesRef);
+ if (ord >= 0) {
+ ordSet.put(ord);
+ }
+ }
+ }
+
+}
Index: modules/grouping/src/java/org/apache/lucene/search/grouping/SecondPassGroupingCollector.java
===================================================================
--- modules/grouping/src/java/org/apache/lucene/search/grouping/SecondPassGroupingCollector.java (revision 1103150)
+++ modules/grouping/src/java/org/apache/lucene/search/grouping/SecondPassGroupingCollector.java (revision )
@@ -17,49 +17,41 @@
* limitations under the License.
*/
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.search.*;
+
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
-import org.apache.lucene.index.IndexReader.AtomicReaderContext;
-import org.apache.lucene.search.Collector;
-import org.apache.lucene.search.FieldCache;
-import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.Sort;
-import org.apache.lucene.search.TopDocs;
-import org.apache.lucene.search.TopDocsCollector;
-import org.apache.lucene.search.TopFieldCollector;
-import org.apache.lucene.search.TopScoreDocCollector;
-import org.apache.lucene.util.BytesRef;
-
/**
* SecondPassGroupingCollector is the second of two passes
* necessary to collect grouped docs. This pass gathers the
* top N documents per top group computed from the
- * first pass.
+ * first pass. Concrete subclasses define what a group is and how it
+ * is internally collected.
*
* See {@link org.apache.lucene.search.grouping} for more
* details including a full code example.
*
* @lucene.experimental
*/
-public class SecondPassGroupingCollector extends Collector {
- private final HashMap groupMap;
+public abstract class SecondPassGroupingCollector extends Collector {
+ protected final HashMap> groupMap;
- private FieldCache.DocTermsIndex index;
- private final String groupField;
+ protected final String groupField;
private final int maxDocsPerGroup;
- private final SentinelIntSet ordSet;
- private final SearchGroupDocs[] groupDocs;
- private final BytesRef spareBytesRef = new BytesRef();
- private final Collection groups;
+ // nocommit can we somehow keep this private?
+ // Note (Martijn): I think we need this protected. Since subclasses need to access it.
+ protected SearchGroupDocs[] groupDocs;
+ private final Collection> groups;
private final Sort withinGroupSort;
private final Sort groupSort;
private int totalHitCount;
private int totalGroupedHitCount;
- public SecondPassGroupingCollector(String groupField, Collection groups, Sort groupSort, Sort withinGroupSort,
+ public SecondPassGroupingCollector(String groupField, Collection> groups, Sort groupSort, Sort withinGroupSort,
int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields)
throws IOException {
@@ -74,9 +66,9 @@
this.groupField = groupField;
this.maxDocsPerGroup = maxDocsPerGroup;
- groupMap = new HashMap(groups.size());
+ groupMap = new HashMap>(groups.size());
- for (SearchGroup group : groups) {
+ for (SearchGroup group : groups) {
//System.out.println(" prep group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString()));
final TopDocsCollector collector;
if (withinGroupSort == null) {
@@ -87,25 +79,21 @@
collector = TopFieldCollector.create(withinGroupSort, maxDocsPerGroup, fillSortFields, getScores, getMaxScores, true);
}
groupMap.put(group.groupValue,
- new SearchGroupDocs(group.groupValue,
+ new SearchGroupDocs(group.groupValue,
- collector));
+ collector));
}
-
- ordSet = new SentinelIntSet(groupMap.size(), -1);
- groupDocs = new SearchGroupDocs[ordSet.keys.length];
}
@Override
public void setScorer(Scorer scorer) throws IOException {
- for (SearchGroupDocs group : groupMap.values()) {
+ for (SearchGroupDocs group : groupMap.values()) {
group.collector.setScorer(scorer);
}
}
@Override
public void collect(int doc) throws IOException {
- final int slot = ordSet.find(index.getOrd(doc));
- //System.out.println("SP.collect doc=" + doc + " slot=" + slot);
+ final int slot = getDocSlot(doc);
totalHitCount++;
if (slot >= 0) {
totalGroupedHitCount++;
@@ -113,24 +101,23 @@
}
}
+ /**
+ * Returns the slot the specified doc belongs to.
+ *
+ * @param doc The specified doc
+ * @return the slot the specified doc belongs to.
+ * @throws IOException If an I/O related error occurred
+ */
+ //NOTE: I wonder how this pans out for fq. Fq don't have ords, so we need to allocate a larger groupDocs array.
+ protected abstract int getDocSlot(int doc) throws IOException;
+
@Override
public void setNextReader(AtomicReaderContext readerContext) throws IOException {
//System.out.println("SP.setNextReader");
- for (SearchGroupDocs group : groupMap.values()) {
+ for (SearchGroupDocs group : groupMap.values()) {
group.collector.setNextReader(readerContext);
}
- index = FieldCache.DEFAULT.getTermsIndex(readerContext.reader, groupField);
-
- // Rebuild ordSet
- ordSet.clear();
- for (SearchGroupDocs group : groupMap.values()) {
- //System.out.println(" group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString()));
- int ord = group.groupValue == null ? 0 : index.binarySearchLookup(group.groupValue, spareBytesRef);
- if (ord >= 0) {
- groupDocs[ordSet.put(ord)] = group;
- }
+ }
- }
- }
@Override
public boolean acceptsDocsOutOfOrder() {
@@ -138,22 +125,23 @@
}
public TopGroups getTopGroups(int withinGroupOffset) {
- final GroupDocs[] groupDocsResult = new GroupDocs[groups.size()];
+ // nocommit suppress
+ final GroupDocs[] groupDocsResult = (GroupDocs[]) new GroupDocs[groups.size()];
int groupIDX = 0;
for(SearchGroup group : groups) {
- final SearchGroupDocs groupDocs = groupMap.get(group.groupValue);
+ final SearchGroupDocs groupDocs = groupMap.get(group.groupValue);
final TopDocs topDocs = groupDocs.collector.topDocs(withinGroupOffset, maxDocsPerGroup);
- groupDocsResult[groupIDX++] = new GroupDocs(topDocs.getMaxScore(),
+ groupDocsResult[groupIDX++] = new GroupDocs(topDocs.getMaxScore(),
- topDocs.totalHits,
- topDocs.scoreDocs,
- groupDocs.groupValue,
- group.sortValues);
+ topDocs.totalHits,
+ topDocs.scoreDocs,
+ groupDocs.groupValue,
+ group.sortValues);
}
- return new TopGroups(groupSort.getSort(),
+ return new TopGroups(groupSort.getSort(),
- withinGroupSort == null ? null : withinGroupSort.getSort(),
- totalHitCount, totalGroupedHitCount, groupDocsResult);
+ withinGroupSort == null ? null : withinGroupSort.getSort(),
+ totalHitCount, totalGroupedHitCount, groupDocsResult);
}
}
@@ -161,11 +149,11 @@
// TODO: merge with SearchGroup or not?
// ad: don't need to build a new hashmap
// disad: blows up the size of SearchGroup if we need many of them, and couples implementations
-class SearchGroupDocs {
- public final BytesRef groupValue;
+class SearchGroupDocs {
+ public final GROUP_VALUE_TYPE groupValue;
public final TopDocsCollector collector;
- public SearchGroupDocs(BytesRef groupValue, TopDocsCollector collector) {
+ public SearchGroupDocs(GROUP_VALUE_TYPE groupValue, TopDocsCollector collector) {
this.groupValue = groupValue;
this.collector = collector;
}
Index: modules/grouping/src/java/org/apache/lucene/search/grouping/GroupDocs.java
===================================================================
--- modules/grouping/src/java/org/apache/lucene/search/grouping/GroupDocs.java (revision 1103024)
+++ modules/grouping/src/java/org/apache/lucene/search/grouping/GroupDocs.java (revision )
@@ -23,10 +23,10 @@
/** Represents one group in the results.
*
* @lucene.experimental */
-public class GroupDocs {
+public class GroupDocs {
/** The groupField value for all docs in this group; this
* may be null if hits did not have the groupField. */
- public final BytesRef groupValue;
+ public final GROUP_VALUE_TYPE groupValue;
/** Max score in this group */
public final float maxScore;
@@ -46,7 +46,7 @@
public GroupDocs(float maxScore,
int totalHits,
ScoreDoc[] scoreDocs,
- BytesRef groupValue,
+ GROUP_VALUE_TYPE groupValue,
Comparable[] groupSortValues) {
this.maxScore = maxScore;
this.totalHits = totalHits;
Index: modules/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java
===================================================================
--- modules/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java (revision 1104421)
+++ modules/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java (revision )
@@ -22,7 +22,7 @@
/** Represents result returned by a grouping search.
*
* @lucene.experimental */
-public class TopGroups {
+public class TopGroups {
/** Number of documents matching the search */
public final int totalHitCount;
@@ -33,7 +33,7 @@
public final Integer totalGroupCount;
/** Group results in groupSort order */
- public final GroupDocs[] groups;
+ public final GroupDocs[] groups;
/** How groups are sorted against each other */
public final SortField[] groupSort;
@@ -41,7 +41,7 @@
/** How docs are sorted within each group */
public final SortField[] withinGroupSort;
- public TopGroups(SortField[] groupSort, SortField[] withinGroupSort, int totalHitCount, int totalGroupedHitCount, GroupDocs[] groups) {
+ public TopGroups(SortField[] groupSort, SortField[] withinGroupSort, int totalHitCount, int totalGroupedHitCount, GroupDocs[] groups) {
this.groupSort = groupSort;
this.withinGroupSort = withinGroupSort;
this.totalHitCount = totalHitCount;
Index: modules/grouping/src/java/org/apache/lucene/search/grouping/SearchGroup.java
===================================================================
--- modules/grouping/src/java/org/apache/lucene/search/grouping/SearchGroup.java (revision 1103102)
+++ modules/grouping/src/java/org/apache/lucene/search/grouping/SearchGroup.java (revision )
@@ -17,10 +17,8 @@
* limitations under the License.
*/
-import org.apache.lucene.util.BytesRef;
-
/** @lucene.experimental */
-public class SearchGroup {
- public BytesRef groupValue;
+public class SearchGroup {
+ public GROUP_VALUE_TYPE groupValue;
public Comparable[] sortValues;
}
Index: modules/grouping/src/java/org/apache/lucene/search/grouping/TermSecondPassGroupingCollector.java
===================================================================
--- modules/grouping/src/java/org/apache/lucene/search/grouping/TermSecondPassGroupingCollector.java (revision )
+++ modules/grouping/src/java/org/apache/lucene/search/grouping/TermSecondPassGroupingCollector.java (revision )
@@ -0,0 +1,71 @@
+package org.apache.lucene.search.grouping;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.search.FieldCache;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.util.BytesRef;
+
+import java.io.IOException;
+import java.util.Collection;
+
+/**
+ * Concrete implementation of {@link SecondPassGroupingCollector} that groups based on
+ * field values and more specifically uses {@link org.apache.lucene.search.FieldCache.DocTerms}
+ * to collect grouped docs.
+ *
+ * @lucene.experimental
+ */
+public class TermSecondPassGroupingCollector extends SecondPassGroupingCollector {
+
+ private final SentinelIntSet ordSet;
+ private FieldCache.DocTermsIndex index;
+ private final BytesRef spareBytesRef = new BytesRef();
+
+ public TermSecondPassGroupingCollector(String groupField, Collection> groups, Sort groupSort, Sort withinGroupSort,
+ int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields)
+ throws IOException {
+ super(groupField, groups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields);
+ ordSet = new SentinelIntSet(groupMap.size(), -1);
+// groupDocs = new SearchGroupDocs[groupMap.size()];
+ // nocommit suppress warning
+ groupDocs = (SearchGroupDocs[]) new SearchGroupDocs[ordSet.keys.length];
+ }
+
+ @Override
+ public void setNextReader(AtomicReaderContext readerContext) throws IOException {
+ super.setNextReader(readerContext);
+ index = FieldCache.DEFAULT.getTermsIndex(readerContext.reader, groupField);
+
+ // Rebuild ordSet
+ ordSet.clear();
+ for (SearchGroupDocs group : groupMap.values()) {
+ //System.out.println(" group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString()));
+ int ord = group.groupValue == null ? 0 : index.binarySearchLookup(group.groupValue, spareBytesRef);
+ if (ord >= 0) {
+ groupDocs[ordSet.put(ord)] = group;
+ }
+ }
+ }
+
+ @Override
+ protected int getDocSlot(int doc) {
+ return ordSet.find(index.getOrd(doc));
+ }
+}
\ No newline at end of file
Index: modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java
===================================================================
--- modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java (revision 1104421)
+++ modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java (revision )
@@ -91,15 +91,15 @@
IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
w.close();
- AllGroupsCollector c1 = new AllGroupsCollector(groupField);
+ AllGroupsCollector c1 = new TermsAllGroupsCollector(groupField);
indexSearcher.search(new TermQuery(new Term("content", "random")), c1);
assertEquals(4, c1.getGroupCount());
- AllGroupsCollector c2 = new AllGroupsCollector(groupField);
+ AllGroupsCollector c2 = new TermsAllGroupsCollector(groupField);
indexSearcher.search(new TermQuery(new Term("content", "some")), c2);
assertEquals(3, c2.getGroupCount());
- AllGroupsCollector c3 = new AllGroupsCollector(groupField);
+ AllGroupsCollector c3 = new TermsAllGroupsCollector(groupField);
indexSearcher.search(new TermQuery(new Term("content", "blob")), c3);
assertEquals(2, c3.getGroupCount());
Index: modules/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java
===================================================================
--- modules/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java (revision 1103150)
+++ modules/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java (revision )
@@ -17,48 +17,36 @@
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.TreeSet;
-
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
-import org.apache.lucene.search.Collector;
-import org.apache.lucene.search.FieldCache;
-import org.apache.lucene.search.FieldComparator;
-import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.Sort;
-import org.apache.lucene.search.SortField;
-import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.search.*;
+import java.io.IOException;
+import java.util.*;
+
/** FirstPassGroupingCollector is the first of two passes necessary
* to collect grouped hits. This pass gathers the top N sorted
- * groups.
+ * groups. Concrete subclasses define what a group is and how it
+ * is internally collected.
*
* See {@link org.apache.lucene.search.grouping} for more
* details including a full code example.
*
* @lucene.experimental
*/
+abstract public class FirstPassGroupingCollector extends Collector {
-public class FirstPassGroupingCollector extends Collector {
-
- private final String groupField;
+ protected final String groupField;
private final Sort groupSort;
private final FieldComparator[] comparators;
private final int[] reversed;
private final int topNGroups;
- private final HashMap groupMap;
- private final BytesRef scratchBytesRef = new BytesRef();
+ private final HashMap> groupMap;
private final int compIDXEnd;
// Set once we reach topNGroups unique groups:
- private TreeSet orderedGroups;
+ private TreeSet> orderedGroups;
private int docBase;
private int spareSlot;
- private FieldCache.DocTermsIndex index;
/**
* Create the first pass collector.
@@ -100,7 +88,7 @@
}
spareSlot = topNGroups;
- groupMap = new HashMap(topNGroups);
+ groupMap = new HashMap>(topNGroups);
}
/** Returns top groups, starting from offset. This may
@@ -125,12 +113,12 @@
final Collection result = new ArrayList();
int upto = 0;
final int sortFieldCount = groupSort.getSort().length;
- for(CollectedSearchGroup group : orderedGroups) {
+ for(CollectedSearchGroup group : orderedGroups) {
if (upto++ < groupOffset) {
continue;
}
//System.out.println(" group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString()));
- SearchGroup searchGroup = new SearchGroup();
+ SearchGroup searchGroup = new SearchGroup();
searchGroup.groupValue = group.groupValue;
if (fillFields) {
searchGroup.sortValues = new Comparable[sortFieldCount];
@@ -189,14 +177,10 @@
// TODO: should we add option to mean "ignore docs that
// don't have the group field" (instead of stuffing them
// under null group)?
- final int ord = index.getOrd(doc);
- //System.out.println(" ord=" + ord);
+ final GROUP_VALUE_TYPE groupValue = getDocGroupValue(doc);
- final BytesRef br = ord == 0 ? null : index.lookup(ord, scratchBytesRef);
- //System.out.println(" group=" + (br == null ? "null" : br.utf8ToString()));
+ final CollectedSearchGroup group = groupMap.get(groupValue);
- final CollectedSearchGroup group = groupMap.get(br);
-
if (group == null) {
// First time we are seeing this group, or, we've seen
@@ -210,8 +194,8 @@
// just keep collecting them
// Add a new CollectedSearchGroup:
- CollectedSearchGroup sg = new CollectedSearchGroup();
- sg.groupValue = ord == 0 ? null : new BytesRef(scratchBytesRef);
+ CollectedSearchGroup sg = new CollectedSearchGroup();
+ sg.groupValue = copyDocGroupValue(groupValue, null);
sg.comparatorSlot = groupMap.size();
sg.topDoc = docBase + doc;
for (FieldComparator fc : comparators) {
@@ -233,20 +217,14 @@
// the bottom group with this new group.
// java 6-only: final CollectedSearchGroup bottomGroup = orderedGroups.pollLast();
- final CollectedSearchGroup bottomGroup = orderedGroups.last();
+ final CollectedSearchGroup bottomGroup = orderedGroups.last();
orderedGroups.remove(bottomGroup);
assert orderedGroups.size() == topNGroups -1;
groupMap.remove(bottomGroup.groupValue);
// reuse the removed CollectedSearchGroup
- if (br == null) {
- bottomGroup.groupValue = null;
- } else if (bottomGroup.groupValue != null) {
- bottomGroup.groupValue.copy(br);
- } else {
- bottomGroup.groupValue = new BytesRef(br);
- }
+ bottomGroup.groupValue = copyDocGroupValue(groupValue, bottomGroup.groupValue);
bottomGroup.topDoc = docBase + doc;
for (FieldComparator fc : comparators) {
@@ -291,7 +269,7 @@
// Remove before updating the group since lookup is done via comparators
// TODO: optimize this
- final CollectedSearchGroup prevLast;
+ final CollectedSearchGroup prevLast;
if (orderedGroups != null) {
prevLast = orderedGroups.last();
orderedGroups.remove(group);
@@ -336,7 +314,7 @@
}
};
- orderedGroups = new TreeSet(comparator);
+ orderedGroups = new TreeSet>(comparator);
orderedGroups.addAll(groupMap.values());
assert orderedGroups.size() > 0;
@@ -353,15 +331,31 @@
@Override
public void setNextReader(AtomicReaderContext readerContext) throws IOException {
docBase = readerContext.docBase;
- index = FieldCache.DEFAULT.getTermsIndex(readerContext.reader, groupField);
-
for (int i=0; i extends SearchGroup {
int topDoc;
int comparatorSlot;
}
Index: modules/grouping/src/java/org/apache/lucene/search/grouping/TermFirstPassGroupingCollector.java
===================================================================
--- modules/grouping/src/java/org/apache/lucene/search/grouping/TermFirstPassGroupingCollector.java (revision )
+++ modules/grouping/src/java/org/apache/lucene/search/grouping/TermFirstPassGroupingCollector.java (revision )
@@ -0,0 +1,66 @@
+package org.apache.lucene.search.grouping;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.search.FieldCache;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.util.BytesRef;
+
+import java.io.IOException;
+
+/**
+ * Concrete implementation of {@link FirstPassGroupingCollector} that groups based on
+ * field values and more specifically uses {@link org.apache.lucene.search.FieldCache.DocTerms}
+ * to collect groups.
+ *
+ * @lucene.experimental
+ */
+public class TermFirstPassGroupingCollector extends FirstPassGroupingCollector {
+
+ private final BytesRef scratchBytesRef = new BytesRef();
+ private FieldCache.DocTermsIndex index;
+
+ public TermFirstPassGroupingCollector(String groupField, Sort groupSort, int topNGroups) throws IOException {
+ super(groupField, groupSort, topNGroups);
+ }
+
+ @Override
+ protected BytesRef getDocGroupValue(int doc) {
+ final int ord = index.getOrd(doc);
+ return ord == 0 ? null : index.lookup(ord, scratchBytesRef);
+ }
+
+ @Override
+ protected BytesRef copyDocGroupValue(BytesRef groupValue, BytesRef reuse) {
+ if (groupValue == null) {
+ return null;
+ } else if (reuse != null) {
+ reuse.copy(groupValue);
+ return reuse;
+ } else {
+ return new BytesRef(groupValue);
+ }
+ }
+
+ @Override
+ public void setNextReader(AtomicReaderContext readerContext) throws IOException {
+ super.setNextReader(readerContext);
+ index = FieldCache.DEFAULT.getTermsIndex(readerContext.reader, groupField);
+ }
+}
Index: modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java
===================================================================
--- modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java (revision 1124379)
+++ modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java (revision )
@@ -17,8 +17,6 @@
package org.apache.lucene.search.grouping;
-import java.util.*;
-
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -32,6 +30,8 @@
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
+import java.util.*;
+
// TODO
// - should test relevance sort too
// - test null
@@ -102,10 +102,10 @@
w.close();
final Sort groupSort = Sort.RELEVANCE;
- final FirstPassGroupingCollector c1 = new FirstPassGroupingCollector(groupField, groupSort, 10);
+ final FirstPassGroupingCollector c1 = new TermFirstPassGroupingCollector(groupField, groupSort, 10);
indexSearcher.search(new TermQuery(new Term("content", "random")), c1);
- final SecondPassGroupingCollector c2 = new SecondPassGroupingCollector(groupField, c1.getTopGroups(0, true), groupSort, null, 5, true, false, true);
+ final SecondPassGroupingCollector c2 = new TermSecondPassGroupingCollector(groupField, c1.getTopGroups(0, true), groupSort, null, 5, true, false, true);
indexSearcher.search(new TermQuery(new Term("content", "random")), c2);
final TopGroups groups = c2.getTopGroups(0);
@@ -437,12 +437,12 @@
final AllGroupsCollector allGroupsCollector;
if (doAllGroups) {
- allGroupsCollector = new AllGroupsCollector("group");
+ allGroupsCollector = new TermsAllGroupsCollector("group");
} else {
allGroupsCollector = null;
}
- final FirstPassGroupingCollector c1 = new FirstPassGroupingCollector("group", groupSort, groupOffset+topNGroups);
+ final FirstPassGroupingCollector c1 = new TermFirstPassGroupingCollector("group", groupSort, groupOffset+topNGroups);
final CachingCollector cCache;
final Collector c;
@@ -493,19 +493,19 @@
}
}
- final Collection topGroups = c1.getTopGroups(groupOffset, fillFields);
+ final Collection> topGroups = c1.getTopGroups(groupOffset, fillFields);
final TopGroups groupsResult;
if (topGroups != null) {
if (VERBOSE) {
System.out.println("TEST: topGroups");
- for (SearchGroup searchGroup : topGroups) {
+ for (SearchGroup searchGroup : topGroups) {
System.out.println(" " + (searchGroup.groupValue == null ? "null" : searchGroup.groupValue.utf8ToString()) + ": " + Arrays.deepToString(searchGroup.sortValues));
}
}
- final SecondPassGroupingCollector c2 = new SecondPassGroupingCollector("group", topGroups, groupSort, docSort, docOffset+docsPerGroup, getScores, getMaxScores, fillFields);
+ final SecondPassGroupingCollector c2 = new TermSecondPassGroupingCollector("group", topGroups, groupSort, docSort, docOffset+docsPerGroup, getScores, getMaxScores, fillFields);
if (doCache) {
if (cCache.isCached()) {
if (VERBOSE) {