Index: modules/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java --- modules/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java Sun May 29 12:12:38 2011 -0400 +++ modules/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java Sun May 29 13:06:19 2011 -0400 @@ -43,22 +43,20 @@ * @lucene.experimental */ -public class FirstPassGroupingCollector extends Collector { +abstract public class FirstPassGroupingCollector extends Collector { - private final String groupField; + protected final String groupField; private final Sort groupSort; private final FieldComparator[] comparators; private final int[] reversed; private final int topNGroups; - private final HashMap groupMap; - private final BytesRef scratchBytesRef = new BytesRef(); + private final HashMap> groupMap; private final int compIDXEnd; // Set once we reach topNGroups unique groups: - private TreeSet orderedGroups; + private TreeSet> orderedGroups; private int docBase; private int spareSlot; - private FieldCache.DocTermsIndex index; /** * Create the first pass collector. @@ -100,7 +98,7 @@ } spareSlot = topNGroups; - groupMap = new HashMap(topNGroups); + groupMap = new HashMap>(topNGroups); } /** Returns top groups, starting from offset. This may @@ -125,12 +123,12 @@ final Collection result = new ArrayList(); int upto = 0; final int sortFieldCount = groupSort.getSort().length; - for(CollectedSearchGroup group : orderedGroups) { + for(CollectedSearchGroup group : orderedGroups) { if (upto++ < groupOffset) { continue; } //System.out.println(" group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString())); - SearchGroup searchGroup = new SearchGroup(); + SearchGroup searchGroup = new SearchGroup(); searchGroup.groupValue = group.groupValue; if (fillFields) { searchGroup.sortValues = new Comparable[sortFieldCount]; @@ -189,13 +187,9 @@ // TODO: should we add option to mean "ignore docs that // don't have the group field" (instead of stuffing them // under null group)? - final int ord = index.getOrd(doc); - //System.out.println(" ord=" + ord); + final GROUP_VALUE_TYPE groupValue = getDocGroupValue(doc); - final BytesRef br = ord == 0 ? null : index.lookup(ord, scratchBytesRef); - //System.out.println(" group=" + (br == null ? "null" : br.utf8ToString())); - - final CollectedSearchGroup group = groupMap.get(br); + final CollectedSearchGroup group = groupMap.get(groupValue); if (group == null) { @@ -210,8 +204,8 @@ // just keep collecting them // Add a new CollectedSearchGroup: - CollectedSearchGroup sg = new CollectedSearchGroup(); - sg.groupValue = ord == 0 ? null : new BytesRef(scratchBytesRef); + CollectedSearchGroup sg = new CollectedSearchGroup(); + sg.groupValue = copyDocGroupValue(groupValue, null); sg.comparatorSlot = groupMap.size(); sg.topDoc = docBase + doc; for (FieldComparator fc : comparators) { @@ -233,20 +227,14 @@ // the bottom group with this new group. // java 6-only: final CollectedSearchGroup bottomGroup = orderedGroups.pollLast(); - final CollectedSearchGroup bottomGroup = orderedGroups.last(); + final CollectedSearchGroup bottomGroup = orderedGroups.last(); orderedGroups.remove(bottomGroup); assert orderedGroups.size() == topNGroups -1; groupMap.remove(bottomGroup.groupValue); // reuse the removed CollectedSearchGroup - if (br == null) { - bottomGroup.groupValue = null; - } else if (bottomGroup.groupValue != null) { - bottomGroup.groupValue.copy(br); - } else { - bottomGroup.groupValue = new BytesRef(br); - } + bottomGroup.groupValue = copyDocGroupValue(groupValue, bottomGroup.groupValue); bottomGroup.topDoc = docBase + doc; for (FieldComparator fc : comparators) { @@ -291,7 +279,7 @@ // Remove before updating the group since lookup is done via comparators // TODO: optimize this - final CollectedSearchGroup prevLast; + final CollectedSearchGroup prevLast; if (orderedGroups != null) { prevLast = orderedGroups.last(); orderedGroups.remove(group); @@ -336,7 +324,7 @@ } }; - orderedGroups = new TreeSet(comparator); + orderedGroups = new TreeSet>(comparator); orderedGroups.addAll(groupMap.values()); assert orderedGroups.size() > 0; @@ -353,15 +341,17 @@ @Override public void setNextReader(AtomicReaderContext readerContext) throws IOException { docBase = readerContext.docBase; - index = FieldCache.DEFAULT.getTermsIndex(readerContext.reader, groupField); - for (int i=0; i extends SearchGroup { int topDoc; int comparatorSlot; } Index: modules/grouping/src/java/org/apache/lucene/search/grouping/GroupDocs.java --- modules/grouping/src/java/org/apache/lucene/search/grouping/GroupDocs.java Sun May 29 12:12:38 2011 -0400 +++ modules/grouping/src/java/org/apache/lucene/search/grouping/GroupDocs.java Sun May 29 13:06:19 2011 -0400 @@ -23,10 +23,10 @@ /** Represents one group in the results. * * @lucene.experimental */ -public class GroupDocs { +public class GroupDocs { /** The groupField value for all docs in this group; this * may be null if hits did not have the groupField. */ - public final BytesRef groupValue; + public final GROUP_VALUE_TYPE groupValue; /** Max score in this group */ public final float maxScore; @@ -46,7 +46,7 @@ public GroupDocs(float maxScore, int totalHits, ScoreDoc[] scoreDocs, - BytesRef groupValue, + GROUP_VALUE_TYPE groupValue, Comparable[] groupSortValues) { this.maxScore = maxScore; this.totalHits = totalHits; Index: modules/grouping/src/java/org/apache/lucene/search/grouping/SearchGroup.java --- modules/grouping/src/java/org/apache/lucene/search/grouping/SearchGroup.java Sun May 29 12:12:38 2011 -0400 +++ modules/grouping/src/java/org/apache/lucene/search/grouping/SearchGroup.java Sun May 29 13:06:19 2011 -0400 @@ -20,7 +20,7 @@ import org.apache.lucene.util.BytesRef; /** @lucene.experimental */ -public class SearchGroup { - public BytesRef groupValue; +public class SearchGroup { + public GROUP_VALUE_TYPE groupValue; public Comparable[] sortValues; } Index: modules/grouping/src/java/org/apache/lucene/search/grouping/SecondPassGroupingCollector.java --- modules/grouping/src/java/org/apache/lucene/search/grouping/SecondPassGroupingCollector.java Sun May 29 12:12:38 2011 -0400 +++ modules/grouping/src/java/org/apache/lucene/search/grouping/SecondPassGroupingCollector.java Sun May 29 13:06:19 2011 -0400 @@ -43,23 +43,21 @@ * * @lucene.experimental */ -public class SecondPassGroupingCollector extends Collector { - private final HashMap groupMap; +public abstract class SecondPassGroupingCollector extends Collector { + protected final HashMap> groupMap; - private FieldCache.DocTermsIndex index; - private final String groupField; + protected final String groupField; private final int maxDocsPerGroup; - private final SentinelIntSet ordSet; - private final SearchGroupDocs[] groupDocs; - private final BytesRef spareBytesRef = new BytesRef(); - private final Collection groups; + // nocommit can we somehow keep this private? + protected final SearchGroupDocs[] groupDocs; + private final Collection> groups; private final Sort withinGroupSort; private final Sort groupSort; private int totalHitCount; private int totalGroupedHitCount; - public SecondPassGroupingCollector(String groupField, Collection groups, Sort groupSort, Sort withinGroupSort, + public SecondPassGroupingCollector(String groupField, Collection> groups, Sort groupSort, Sort withinGroupSort, int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields) throws IOException { @@ -74,9 +72,9 @@ this.groupField = groupField; this.maxDocsPerGroup = maxDocsPerGroup; - groupMap = new HashMap(groups.size()); + groupMap = new HashMap>(groups.size()); - for (SearchGroup group : groups) { + for (SearchGroup group : groups) { //System.out.println(" prep group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString())); final TopDocsCollector collector; if (withinGroupSort == null) { @@ -87,25 +85,25 @@ collector = TopFieldCollector.create(withinGroupSort, maxDocsPerGroup, fillSortFields, getScores, getMaxScores, true); } groupMap.put(group.groupValue, - new SearchGroupDocs(group.groupValue, - collector)); + new SearchGroupDocs(group.groupValue, + collector)); } - ordSet = new SentinelIntSet(groupMap.size(), -1); - groupDocs = new SearchGroupDocs[ordSet.keys.length]; + //groupDocs = new SearchGroupDocs[groupMap.size()]; + // nocommit suppress warning + groupDocs = (SearchGroupDocs[]) new SearchGroupDocs[groupMap.size()]; } @Override public void setScorer(Scorer scorer) throws IOException { - for (SearchGroupDocs group : groupMap.values()) { + for (SearchGroupDocs group : groupMap.values()) { group.collector.setScorer(scorer); } } @Override public void collect(int doc) throws IOException { - final int slot = ordSet.find(index.getOrd(doc)); - //System.out.println("SP.collect doc=" + doc + " slot=" + slot); + final int slot = getDocSlot(doc); totalHitCount++; if (slot >= 0) { totalGroupedHitCount++; @@ -113,23 +111,14 @@ } } + protected abstract int getDocSlot(int doc) throws IOException; + @Override public void setNextReader(AtomicReaderContext readerContext) throws IOException { //System.out.println("SP.setNextReader"); - for (SearchGroupDocs group : groupMap.values()) { + for (SearchGroupDocs group : groupMap.values()) { group.collector.setNextReader(readerContext); } - index = FieldCache.DEFAULT.getTermsIndex(readerContext.reader, groupField); - - // Rebuild ordSet - ordSet.clear(); - for (SearchGroupDocs group : groupMap.values()) { - //System.out.println(" group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString())); - int ord = group.groupValue == null ? 0 : index.binarySearchLookup(group.groupValue, spareBytesRef); - if (ord >= 0) { - groupDocs[ordSet.put(ord)] = group; - } - } } @Override @@ -138,22 +127,23 @@ } public TopGroups getTopGroups(int withinGroupOffset) { - final GroupDocs[] groupDocsResult = new GroupDocs[groups.size()]; + // nocommit suppress + final GroupDocs[] groupDocsResult = (GroupDocs[]) new GroupDocs[groups.size()]; int groupIDX = 0; for(SearchGroup group : groups) { - final SearchGroupDocs groupDocs = groupMap.get(group.groupValue); + final SearchGroupDocs groupDocs = groupMap.get(group.groupValue); final TopDocs topDocs = groupDocs.collector.topDocs(withinGroupOffset, maxDocsPerGroup); - groupDocsResult[groupIDX++] = new GroupDocs(topDocs.getMaxScore(), - topDocs.totalHits, - topDocs.scoreDocs, - groupDocs.groupValue, - group.sortValues); + groupDocsResult[groupIDX++] = new GroupDocs(topDocs.getMaxScore(), + topDocs.totalHits, + topDocs.scoreDocs, + groupDocs.groupValue, + group.sortValues); } - return new TopGroups(groupSort.getSort(), - withinGroupSort == null ? null : withinGroupSort.getSort(), - totalHitCount, totalGroupedHitCount, groupDocsResult); + return new TopGroups(groupSort.getSort(), + withinGroupSort == null ? null : withinGroupSort.getSort(), + totalHitCount, totalGroupedHitCount, groupDocsResult); } } @@ -161,11 +151,11 @@ // TODO: merge with SearchGroup or not? // ad: don't need to build a new hashmap // disad: blows up the size of SearchGroup if we need many of them, and couples implementations -class SearchGroupDocs { - public final BytesRef groupValue; +class SearchGroupDocs { + public final GROUP_VALUE_TYPE groupValue; public final TopDocsCollector collector; - public SearchGroupDocs(BytesRef groupValue, TopDocsCollector collector) { + public SearchGroupDocs(GROUP_VALUE_TYPE groupValue, TopDocsCollector collector) { this.groupValue = groupValue; this.collector = collector; } Index: modules/grouping/src/java/org/apache/lucene/search/grouping/TermFirstPassGroupingCollector.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ modules/grouping/src/java/org/apache/lucene/search/grouping/TermFirstPassGroupingCollector.java Sun May 29 13:06:19 2011 -0400 @@ -0,0 +1,59 @@ +package org.apache.lucene.search.grouping; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.search.Sort; +import org.apache.lucene.util.BytesRef; + +import java.io.IOException; + +public class TermFirstPassGroupingCollector extends FirstPassGroupingCollector { + + private final BytesRef scratchBytesRef = new BytesRef(); + private FieldCache.DocTermsIndex index; + + public TermFirstPassGroupingCollector(String groupField, Sort groupSort, int topNGroups) throws IOException { + super(groupField, groupSort, topNGroups); + } + + @Override + protected BytesRef getDocGroupValue(int doc) { + final int ord = index.getOrd(doc); + return ord == 0 ? null : index.lookup(ord, scratchBytesRef); + } + + @Override + protected BytesRef copyDocGroupValue(BytesRef groupValue, BytesRef reuse) { + if (groupValue == null) { + return null; + } else if (reuse != null) { + reuse.copy(groupValue); + return reuse; + } else { + return new BytesRef(groupValue); + } + } + + @Override + public void setNextReader(AtomicReaderContext readerContext) throws IOException { + super.setNextReader(readerContext); + index = FieldCache.DEFAULT.getTermsIndex(readerContext.reader, groupField); + } +} Index: modules/grouping/src/java/org/apache/lucene/search/grouping/TermSecondPassGroupingCollector.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ modules/grouping/src/java/org/apache/lucene/search/grouping/TermSecondPassGroupingCollector.java Sun May 29 13:06:19 2011 -0400 @@ -0,0 +1,60 @@ +package org.apache.lucene.search.grouping; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Collection; + +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.search.Sort; +import org.apache.lucene.util.BytesRef; + +public class TermSecondPassGroupingCollector extends SecondPassGroupingCollector { + private final SentinelIntSet ordSet; + private FieldCache.DocTermsIndex index; + private final BytesRef spareBytesRef = new BytesRef(); + + public TermSecondPassGroupingCollector(String groupField, Collection> groups, Sort groupSort, Sort withinGroupSort, + int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields) + throws IOException { + super(groupField, groups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields); + ordSet = new SentinelIntSet(groupMap.size(), -1); + } + + @Override + public void setNextReader(AtomicReaderContext readerContext) throws IOException { + super.setNextReader(readerContext); + index = FieldCache.DEFAULT.getTermsIndex(readerContext.reader, groupField); + + // Rebuild ordSet + ordSet.clear(); + for (SearchGroupDocs group : groupMap.values()) { + //System.out.println(" group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString())); + int ord = group.groupValue == null ? 0 : index.binarySearchLookup(group.groupValue, spareBytesRef); + if (ord >= 0) { + groupDocs[ordSet.put(ord)] = group; + } + } + } + + @Override + protected int getDocSlot(int doc) { + return ordSet.find(index.getOrd(doc)); + } +} \ No newline at end of file Index: modules/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java --- modules/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java Sun May 29 12:12:38 2011 -0400 +++ modules/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java Sun May 29 13:06:19 2011 -0400 @@ -22,7 +22,7 @@ /** Represents result returned by a grouping search. * * @lucene.experimental */ -public class TopGroups { +public class TopGroups { /** Number of documents matching the search */ public final int totalHitCount; @@ -33,7 +33,7 @@ public final Integer totalGroupCount; /** Group results in groupSort order */ - public final GroupDocs[] groups; + public final GroupDocs[] groups; /** How groups are sorted against each other */ public final SortField[] groupSort; @@ -41,7 +41,7 @@ /** How docs are sorted within each group */ public final SortField[] withinGroupSort; - public TopGroups(SortField[] groupSort, SortField[] withinGroupSort, int totalHitCount, int totalGroupedHitCount, GroupDocs[] groups) { + public TopGroups(SortField[] groupSort, SortField[] withinGroupSort, int totalHitCount, int totalGroupedHitCount, GroupDocs[] groups) { this.groupSort = groupSort; this.withinGroupSort = withinGroupSort; this.totalHitCount = totalHitCount;