Index: dev-tools/idea/modules/grouping/grouping.iml =================================================================== --- dev-tools/idea/modules/grouping/grouping.iml (revision 1178182) +++ dev-tools/idea/modules/grouping/grouping.iml (revision ) @@ -12,5 +12,6 @@ + Index: modules/build.xml =================================================================== --- modules/build.xml (revision 1178182) +++ modules/build.xml (revision ) @@ -25,9 +25,9 @@ + - @@ -40,9 +40,9 @@ + - @@ -55,9 +55,9 @@ + - @@ -70,9 +70,9 @@ + - @@ -86,9 +86,9 @@ + - @@ -100,9 +100,9 @@ + - @@ -116,9 +116,9 @@ + - Index: modules/queries/src/java/org/apache/lucene/queries/function/valuesource/BytesRefFieldSource.java =================================================================== --- modules/queries/src/java/org/apache/lucene/queries/function/valuesource/BytesRefFieldSource.java (revision ) +++ modules/queries/src/java/org/apache/lucene/queries/function/valuesource/BytesRefFieldSource.java (revision ) @@ -0,0 +1,58 @@ +package org.apache.lucene.queries.function.valuesource; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.queries.function.DocValues; +import org.apache.lucene.queries.function.docvalues.StringIndexDocValues; +import org.apache.lucene.queries.function.ValueSource; //javadoc + +import java.io.IOException; +import java.util.Map; + +/** + * An implementation for retrieving {@link ValueSource} instances for a String based field. + */ +public class BytesRefFieldSource extends FieldCacheSource { + + public BytesRefFieldSource(String field) { + super(field); + } + + @Override + public DocValues getValues(Map context, IndexReader.AtomicReaderContext readerContext) throws IOException { + return new StringIndexDocValues(this, readerContext, field) { + + @Override + protected String toTerm(String readableValue) { + return readableValue; + } + + @Override + public Object objectVal(int doc) { + return strVal(doc); + } + + @Override + public String toString(int doc) { + return description() + '=' + strVal(doc); + } + + }; + } +} Index: modules/queries/src/java/org/apache/lucene/queries/function/docvalues/StringIndexDocValues.java =================================================================== --- modules/queries/src/java/org/apache/lucene/queries/function/docvalues/StringIndexDocValues.java (revision 1178182) +++ modules/queries/src/java/org/apache/lucene/queries/function/docvalues/StringIndexDocValues.java (revision ) @@ -135,6 +135,22 @@ } @Override + public int ordVal(int doc) { + return termsIndex.getOrd(doc); + } + + @Override + public int numOrd() { + return termsIndex.numOrd(); + } + + @Override + public int ord(MutableValue value) { + BytesRef rawValue = ((MutableValueStr) value).value; + return termsIndex.binarySearchLookup(rawValue, spare); + } + + @Override public ValueFiller getValueFiller() { return new ValueFiller() { private final MutableValueStr mval = new MutableValueStr(); Index: modules/grouping/src/java/org/apache/lucene/search/grouping/SentinelIntSet.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/SentinelIntSet.java (revision 1178182) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/SentinelIntSet.java (revision ) @@ -20,7 +20,7 @@ import java.util.Arrays; /** A native int set where one value is reserved to mean "EMPTY" */ -class SentinelIntSet { +public class SentinelIntSet { public int[] keys; public int count; public final int emptyVal; @@ -99,6 +99,14 @@ return s; } + public void remove(int key) { + int s = find(key); + if (s >= 0) { + count--; + keys[s] = emptyVal; + } + } + public void rehash() { int newSize = keys.length << 1; int[] oldKeys = keys; Index: modules/grouping/src/java/org/apache/lucene/search/grouping/research/ResearchFirstPassGroupingCollector.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/research/ResearchFirstPassGroupingCollector.java (revision ) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/research/ResearchFirstPassGroupingCollector.java (revision ) @@ -0,0 +1,366 @@ +package org.apache.lucene.search.grouping.research; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.queries.function.DocValues; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.search.*; +import org.apache.lucene.search.grouping.groupholder.GroupHolder; +import org.apache.lucene.util.mutable.MutableValue; + +import java.io.IOException; +import java.util.*; + +/** FirstPassGroupingCollector is the first of two passes necessary + * to collect grouped hits. This pass gathers the top N sorted + * groups. + * + *

See {@link org.apache.lucene.search.grouping} for more + * details including a full code example.

+ * + * @lucene.experimental + */ + +public class ResearchFirstPassGroupingCollector extends Collector { + + private final Sort groupSort; + private final FieldComparator[] comparators; + private final int[] reversed; + private final int topNGroups; +// private final HashMap groupMap; + private final GroupHolder groupHolder; + private final int compIDXEnd; + private final ValueSource valueSource; + private final Map vsContext; + + // Set once we reach topNGroups unique groups: + private TreeSet orderedGroups; + private int docBase; + private int spareSlot; + + private DocValues.ValueFiller filler; + private MutableValue mval; + + public ResearchFirstPassGroupingCollector(ValueSource valueSource, GroupHolder groupHolder, Sort groupSort, int topNGroups) throws IOException { + this(valueSource, groupHolder, new HashMap(), groupSort, topNGroups); + } + + @SuppressWarnings("unchecked") + public ResearchFirstPassGroupingCollector(ValueSource valueSource, GroupHolder groupHolder, Map vsContext, Sort groupSort, int topNGroups) throws IOException { + if (topNGroups < 1) { + throw new IllegalArgumentException("topNGroups must be >= 1 (got " + topNGroups + ")"); + } + + this.valueSource = valueSource; + this.vsContext = vsContext; + // TODO: allow null groupSort to mean "by relevance", + // and specialize it? + this.groupSort = groupSort; + + this.topNGroups = topNGroups; + + final SortField[] sortFields = groupSort.getSort(); + comparators = new FieldComparator[sortFields.length]; + compIDXEnd = comparators.length - 1; + reversed = new int[sortFields.length]; + for (int i = 0; i < sortFields.length; i++) { + final SortField sortField = sortFields[i]; + + // use topNGroups + 1 so we have a spare slot to use for comparing (tracked by this.spareSlot): + comparators[i] = sortField.getComparator(topNGroups + 1, i); + reversed[i] = sortField.getReverse() ? -1 : 1; + } + + spareSlot = topNGroups; + this.groupHolder = groupHolder; +// groupMap = new HashMap(topNGroups); + } + + /** Returns top groups, starting from offset. This may + * return null, if no groups were collected, or if the + * number of unique groups collected is <= offset. */ + public Collection getTopGroups(int groupOffset, boolean fillFields) { + + //System.out.println("FP.getTopGroups groupOffset=" + groupOffset + " fillFields=" + fillFields + " groupMap.size()=" + groupMap.size()); + + if (groupOffset < 0) { + throw new IllegalArgumentException("groupOffset must be >= 0 (got " + groupOffset + ")"); + } + +// if (groupMap.size() <= groupOffset) { + if (groupHolder.getHeldGroupsSize() <= groupOffset) { + return null; + } + + if (orderedGroups == null) { + buildSortedSet(); + } + + final Collection result = new ArrayList(); + int upto = 0; + final int sortFieldCount = groupSort.getSort().length; + for(CollectedSearchGroup group : orderedGroups) { + if (upto++ < groupOffset) { + continue; + } + //System.out.println(" group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString())); + ResearchSearchGroup searchGroup = new ResearchSearchGroup(); + searchGroup.groupValue = group.groupValue; + if (fillFields) { + searchGroup.sortValues = new Comparable[sortFieldCount]; + for(int sortFieldIDX=0;sortFieldIDX 0) { + // Definitely competitive; set remaining comparators: + for (int compIDX2=compIDX+1; compIDX2 comparator = new Comparator() { + public int compare(CollectedSearchGroup o1, CollectedSearchGroup o2) { + for (int compIDX = 0;; compIDX++) { + FieldComparator fc = comparators[compIDX]; + final int c = reversed[compIDX] * fc.compare(o1.comparatorSlot, o2.comparatorSlot); + if (c != 0) { + return c; + } else if (compIDX == compIDXEnd) { + return o1.topDoc - o2.topDoc; + } + } + } + }; + + orderedGroups = new TreeSet(comparator); +// orderedGroups.addAll(groupMap.values()); + orderedGroups.addAll((Collection) groupHolder.getHeldGroups()); + assert orderedGroups.size() > 0; + + for (FieldComparator fc : comparators) { + fc.setBottom(orderedGroups.last().comparatorSlot); + } + } + + @Override + public boolean acceptsDocsOutOfOrder() { + return true; + } + + @Override + public void setNextReader(AtomicReaderContext readerContext) throws IOException { + docBase = readerContext.docBase; + DocValues groupValueSource = valueSource.getValues(vsContext, readerContext); + filler = groupValueSource.getValueFiller(); + mval = filler.getValue(); + groupHolder.prepareForNextSegment(readerContext); + + for (int i=0; i values; + private final ValueSource valueSource; + private final Map vsContext; + + DocValues docValues; + + // Builder??? + public static GroupHolder create(ValueSource valueSource ) { + return create(valueSource, new HashMap(), DEFAULT_INITIAL_SIZE); + } + + public static GroupHolder create(ValueSource valueSource , Map context) { + return create(valueSource, context, DEFAULT_INITIAL_SIZE); + } + + public static GroupHolder create(ValueSource valueSource , int initialSize) { + return create(valueSource, new HashMap(), initialSize); + } + + public static GroupHolder create(ValueSource valueSource, Map context, int initialSize) { + return create(valueSource, context, initialSize, false); + } + + public static GroupHolder create(ValueSource valueSource, Map context, int initialSize, boolean fixed) { + if (fixed) { + return new Fixed(valueSource, context, initialSize); + } else { + return new Variable(valueSource, context, initialSize); + } + } + + TermGroupHolder(ValueSource valueSource, Map context, int initialSize) { + this.valueSource = valueSource; + this.vsContext = context; + values = new ArrayList(initialSize); + } + + public List getHeldGroups() { + return values; + } + + public int getHeldGroupsSize() { + return values.size(); + } + + public void prepareForNextSegment(IndexReader.AtomicReaderContext readerContext) throws IOException { + docValues = valueSource.getValues(vsContext, readerContext); + } + + + // requires more memory, but faster + // Meant for accessing a variable number of groups + static class Variable extends TermGroupHolder { + + private Group[] segmentValues; + + Variable(ValueSource valueSource, Map context, int initialSize) { + super(valueSource, context, initialSize); + } + + @SuppressWarnings({"unchecked"}) + public Group get(int doc) { + int ord = docValues.ordVal(doc); + return segmentValues[ord]; + } + + public boolean exists(int doc) { + int ord = docValues.ordVal(doc); + return segmentValues[ord] != null; + } + + public void set(int doc, Group value) { + int ord = docValues.ordVal(doc); + segmentValues[ord] = value; + values.add(value); + } + + public void add(Group value) { + values.add(value); + } + + public boolean remove(int doc) { + int ord = docValues.ordVal(doc); + Group value = segmentValues[ord]; + segmentValues[ord] = null; + return values.remove(value); + } + + public void clear() { + values.clear(); + segmentValues = null; + } + + public void prepareForNextSegment(IndexReader.AtomicReaderContext readerContext) throws IOException { + super.prepareForNextSegment(readerContext); + segmentValues = new Group[docValues.numOrd()]; + + for (Group group : values) { + int ord = docValues.ord(group.getValue()); + if (ord >= 0) { + segmentValues[ord] = group; + } + + } + } + + } + + // Requires less memory, but slower + // Meant for accessing a fixed number of groups + static class Fixed extends TermGroupHolder { + + private final SentinelIntSet ordSet; + private final Group[] groupDocs; + + Fixed(ValueSource valueSource, Map context, int initialSize) { + super(valueSource, context, initialSize); + ordSet = new SentinelIntSet(initialSize, -1); + groupDocs = new Group[ordSet.keys.length]; + } + + public boolean exists(int doc) { + int slot = ordSet.find(docValues.ordVal(doc)); + return slot >= 0; + } + + @SuppressWarnings({"unchecked"}) + public Group get(int doc) { + int slot = ordSet.find(docValues.ordVal(doc)); + if (slot < 0) { + return null; + } + + return groupDocs[slot]; + } + + public void set(int doc, Group value) { + throw new UnsupportedOperationException(); + } + + public void add(Group value) { + values.add(value); + } + + public boolean remove(int doc) { + throw new UnsupportedOperationException(); + } + + public void clear() { + throw new UnsupportedOperationException(); + } + + public void prepareForNextSegment(IndexReader.AtomicReaderContext readerContext) throws IOException { + super.prepareForNextSegment(readerContext); + + ordSet.clear(); + for (Group group : values) { + int ord = group == null ? 0 : docValues.ord(group.getValue()); + if (ord >= 0) { + groupDocs[ordSet.put(ord)] = group; + } + } + } + + } + +} Index: modules/grouping/src/java/org/apache/lucene/search/grouping/research/ResearchSearchGroup.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/research/ResearchSearchGroup.java (revision ) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/research/ResearchSearchGroup.java (revision ) @@ -0,0 +1,32 @@ +package org.apache.lucene.search.grouping.research; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.grouping.groupholder.Group; +import org.apache.lucene.util.mutable.MutableValue; + +/** @lucene.experimental */ +public class ResearchSearchGroup extends Group { + + public MutableValue groupValue; + public Comparable[] sortValues; + + public MutableValue getValue() { + return groupValue; + } +} Index: modules/grouping/src/java/org/apache/lucene/search/grouping/groupholder/GeneralGroupHolder.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/groupholder/GeneralGroupHolder.java (revision ) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/groupholder/GeneralGroupHolder.java (revision ) @@ -0,0 +1,86 @@ +package org.apache.lucene.search.grouping.groupholder; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.queries.function.DocValues; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.util.mutable.MutableValue; + +import java.io.IOException; +import java.util.*; + +/** + * A general {@link GroupHolder} implementation that works for any group. + */ +public class GeneralGroupHolder implements GroupHolder { + + private final NavigableMap groups = new TreeMap(); + private final ValueSource valueSource; + private final Map vsContext; + + private DocValues.ValueFiller filler; + private MutableValue mval; + + public GeneralGroupHolder(ValueSource valueSource, Map vsContext) { + this.valueSource = valueSource; + this.vsContext = vsContext; + } + + public boolean exists(int doc) { + filler.fillValue(doc); + return groups.containsKey(mval); + } + + @SuppressWarnings({"unchecked"}) + public Group get(int doc) { + filler.fillValue(doc); + return groups.get(mval); + } + + public void set(int doc, Group value) { + groups.put(value.getValue(), value); + } + + public void add(Group value) { + groups.put(value.getValue(), value); + } + + public boolean remove(int doc) { + filler.fillValue(doc); + return groups.remove(mval) != null; + } + + public void clear() { + groups.clear(); + } + + public Collection getHeldGroups() { + return groups.values(); + } + + public int getHeldGroupsSize() { + return groups.size(); + } + + public void prepareForNextSegment(IndexReader.AtomicReaderContext context) throws IOException { + DocValues docValues = valueSource.getValues(vsContext, context); + filler = docValues.getValueFiller(); + mval = filler.getValue(); + } +} Index: modules/grouping/src/java/org/apache/lucene/search/grouping/groupholder/Group.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/groupholder/Group.java (revision ) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/groupholder/Group.java (revision ) @@ -0,0 +1,32 @@ +package org.apache.lucene.search.grouping.groupholder; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.util.mutable.MutableValue; + +/** + * Defines a group. + */ +public abstract class Group { + + /** + * @return The unique value representing this group + */ + public abstract MutableValue getValue(); + +} Index: modules/queries/src/java/org/apache/lucene/queries/function/valuesource/OrdFieldSource.java =================================================================== --- modules/queries/src/java/org/apache/lucene/queries/function/valuesource/OrdFieldSource.java (revision 1178182) +++ modules/queries/src/java/org/apache/lucene/queries/function/valuesource/OrdFieldSource.java (revision ) @@ -23,9 +23,11 @@ import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.docvalues.IntDocValues; import org.apache.lucene.search.FieldCache; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.mutable.MutableValue; import org.apache.lucene.util.mutable.MutableValueInt; +import org.apache.lucene.util.mutable.MutableValueStr; import java.io.IOException; import java.util.Map; @@ -78,7 +80,17 @@ public int ordVal(int doc) { return sindex.getOrd(doc+off); } + + + private final BytesRef spare = new BytesRef(); + @Override + public int ord(MutableValue value) { + BytesRef rawValue = ((MutableValueStr) value).value; + return sindex.binarySearchLookup(rawValue, spare); + } + + @Override public int numOrd() { return sindex.numOrd(); } Index: modules/grouping/src/java/org/apache/lucene/search/grouping/groupholder/GroupHolder.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/groupholder/GroupHolder.java (revision ) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/groupholder/GroupHolder.java (revision ) @@ -0,0 +1,86 @@ +package org.apache.lucene.search.grouping.groupholder; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.mutable.MutableValue; + +import java.io.IOException; +import java.util.Collection; +import java.util.List; + +/** + * A GroupHolder responsibility is to hold {@link Group} instances + * and allows efficient lookup and removal via the internal Lucene id. + *

+ * The Lucene id is translated into a the group value, so multiple documents point to the same group value. + * There can be a {@link Group} for each unique group. + */ +public interface GroupHolder { + + /** + * Returns whether a {@link Group} has been set for the specified document. + * + * @param doc The specified document + * @return whether a {@link Group} has been set for the specified document + */ + boolean exists(int doc); + + /** + * Returns the current {@link Group} the specified doc belongs to or null if for the specified doc no + * {@link Group} could be found. + * + * @param doc The doc to retrieve the {@link Group} for + * @return the current {@link Group} the specified doc belongs to + */ + T get(int doc); + + /** + * Sets the specified {@link Group} to be associated with its group value that is looked up via the specified + * Lucene doc id. + * + * @param doc The Lucene id used to translate to the group value + * @param value The current {@link Group} for the group value + */ + void set(int doc, Group value); + + void add(Group value); + + /** + * Removes any {@link Group} associated with its group value that is looked up via the Lucene id. + * + * @param doc The lucene id used to lookup the group value + * @return whether the {@link Group} for the specified doc was removed + */ + boolean remove(int doc); + + void clear(); + + /** + * Returns the {@link Group} instances being put into this holder via the {@link #set(int, Group)} method. + * The returned array size will be equal to the number of documents that reside in the holder. + * + * @return the {@link Group} instances being put into this holder + */ + Collection getHeldGroups(); + + int getHeldGroupsSize(); + + void prepareForNextSegment(IndexReader.AtomicReaderContext context) throws IOException; + +} Index: modules/grouping/src/java/org/apache/lucene/search/grouping/research/ResearchAllGroupsCollector.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/research/ResearchAllGroupsCollector.java (revision ) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/research/ResearchAllGroupsCollector.java (revision ) @@ -0,0 +1,127 @@ +package org.apache.lucene.search.grouping.research; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.queries.function.DocValues; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.grouping.groupholder.Group; +import org.apache.lucene.search.grouping.groupholder.GroupHolder; +import org.apache.lucene.util.mutable.MutableValue; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; + +/** + * A collector that collects all groups that match the + * query. Only the group value is collected, and the order + * is undefined. This collector does not determine + * the most relevant document of a group. + * + *

+ * Implementation detail: an int hash set (SentinelIntSet) + * is used to detect if a group is already added to the + * total count. For each segment the int set is cleared and filled + * with previous counted groups that occur in the new + * segment. + * + * @lucene.experimental + */ +public class ResearchAllGroupsCollector extends Collector { + + private final GroupHolder groupHolder; + private final ValueSource valueSource; + private final Map vsContext; + + private DocValues.ValueFiller filler; + private MutableValue mval; + + public ResearchAllGroupsCollector(ValueSource valueSource, GroupHolder groupHolder, Map vsContext) { + this.groupHolder = groupHolder; + this.valueSource = valueSource; + this.vsContext = vsContext; + } + + public void setScorer(Scorer scorer) throws IOException { + } + + public void collect(int doc) throws IOException { + if (groupHolder.exists(doc)) { + return; + } + + filler.fillValue(doc); + MutableValue groupValue = mval.duplicate(); + groupHolder.set(doc, new GroupImpl(groupValue)); + } + + /** + * Returns the total number of groups for the executed search. + * This is a convenience method. The following code snippet has the same effect:

getGroups().size()
+ * + * @return The total number of groups for the executed search + */ + public int getGroupCount() { + return groupHolder.getHeldGroupsSize(); + } + + /** + * Returns the group values + *

+ * This is an unordered collections of group values. For each group that matched the query there is a {@link org.apache.lucene.util.BytesRef} + * representing a group value. + * + * @return the group values + */ + public Collection getGroups() { + List groups = new ArrayList(); + for (Group group : groupHolder.getHeldGroups()) { + groups.add(group.getValue()); + } + return groups; + } + + public void setNextReader(IndexReader.AtomicReaderContext context) throws IOException { + DocValues docValues = valueSource.getValues(vsContext, context); + filler = docValues.getValueFiller(); + mval = filler.getValue(); + groupHolder.prepareForNextSegment(context); + } + + public boolean acceptsDocsOutOfOrder() { + return true; + } + + private static final class GroupImpl extends Group { + + private final MutableValue groupValue; + + private GroupImpl(MutableValue groupValue) { + this.groupValue = groupValue; + } + + public MutableValue getValue() { + return groupValue; + } + } +} Index: modules/grouping/src/java/org/apache/lucene/search/grouping/research/ResearchSecondPassGroupingCollector.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/research/ResearchSecondPassGroupingCollector.java (revision ) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/research/ResearchSecondPassGroupingCollector.java (revision ) @@ -0,0 +1,180 @@ +package org.apache.lucene.search.grouping.research; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.search.*; +import org.apache.lucene.search.grouping.GroupDocs; +import org.apache.lucene.search.grouping.TopGroups; +import org.apache.lucene.search.grouping.groupholder.Group; +import org.apache.lucene.search.grouping.groupholder.GroupHolder; +import org.apache.lucene.util.mutable.MutableValue; + +import java.io.IOException; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; + +/** + * ResearchSecondPassGroupingCollector is the second of two passes + * necessary to collect grouped docs. This pass gathers the + * top N documents per top group computed from the + * first pass. Concrete subclasses define what a group is and how it + * is internally collected. + * + *

See {@link org.apache.lucene.search.grouping} for more + * details including a full code example.

+ * + * @lucene.experimental + */ +public class ResearchSecondPassGroupingCollector extends Collector { + + protected final Map secondPhaseGroups; + private final int maxDocsPerGroup; + private final Collection groups; + private final Sort withinGroupSort; + private final Sort groupSort; + private final GroupHolder groupHolder; + + private int totalHitCount; + private int totalGroupedHitCount; + + /** + * + * @param firstPhaseGroups + * @param groupHolder An empty GroupHolder. The holder will be filled based on the specified firstPhaseGroups. + * @param groupSort + * @param withinGroupSort + * @param maxDocsPerGroup + * @param getScores + * @param getMaxScores + * @param fillSortFields + * @throws IOException + */ + public ResearchSecondPassGroupingCollector(Collection firstPhaseGroups, + GroupHolder groupHolder, + Sort groupSort, + Sort withinGroupSort, + int maxDocsPerGroup, + boolean getScores, + boolean getMaxScores, + boolean fillSortFields) throws IOException { + //System.out.println("SP init"); + if (firstPhaseGroups.size() == 0) { + throw new IllegalArgumentException("no groups to collect (groups.size() is 0)"); + } + + this.groupSort = groupSort; + this.withinGroupSort = withinGroupSort; + this.groups = firstPhaseGroups; + this.maxDocsPerGroup = maxDocsPerGroup; + this.groupHolder = groupHolder; + secondPhaseGroups = new HashMap(firstPhaseGroups.size()); + + for (ResearchSearchGroup firstPhaseGroup : firstPhaseGroups) { + //System.out.println(" prep group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString())); + final TopDocsCollector collector; + if (withinGroupSort == null) { + // Sort by score + collector = TopScoreDocCollector.create(maxDocsPerGroup, true); + } else { + // Sort by fields + collector = TopFieldCollector.create(withinGroupSort, maxDocsPerGroup, fillSortFields, getScores, getMaxScores, true); + } + SearchGroupDocs secondPhaseGroup = new SearchGroupDocs(firstPhaseGroup.groupValue, collector); + secondPhaseGroups.put(firstPhaseGroup.groupValue, secondPhaseGroup); + groupHolder.add(secondPhaseGroup); + } + } + + @Override + public void setScorer(Scorer scorer) throws IOException { + for (SearchGroupDocs group : secondPhaseGroups.values()) { + group.collector.setScorer(scorer); + } + } + + @Override + public void collect(int doc) throws IOException { + totalHitCount++; + SearchGroupDocs group = groupHolder.get(doc); + if (group != null) { + totalGroupedHitCount++; + group.collector.collect(doc); + } + } + + @Override + public void setNextReader(AtomicReaderContext readerContext) throws IOException { + //System.out.println("SP.setNextReader"); + groupHolder.prepareForNextSegment(readerContext); + for (SearchGroupDocs group : secondPhaseGroups.values()) { + group.collector.setNextReader(readerContext); + } + } + + @Override + public boolean acceptsDocsOutOfOrder() { + for (SearchGroupDocs group : secondPhaseGroups.values()) { + if (!group.collector.acceptsDocsOutOfOrder()) { + return false; + } + } + return true; + } + + public TopGroups getTopGroups(int withinGroupOffset) { + @SuppressWarnings("unchecked") + final GroupDocs[] groupDocsResult = (GroupDocs[]) new GroupDocs[groups.size()]; + + int groupIDX = 0; + for(ResearchSearchGroup group : groups) { + final SearchGroupDocs groupDocs = secondPhaseGroups.get(group.groupValue); + final TopDocs topDocs = groupDocs.collector.topDocs(withinGroupOffset, maxDocsPerGroup); + groupDocsResult[groupIDX++] = new GroupDocs(topDocs.getMaxScore(), + topDocs.totalHits, + topDocs.scoreDocs, + groupDocs.groupValue, + group.sortValues); + } + + return new TopGroups(groupSort.getSort(), + withinGroupSort == null ? null : withinGroupSort.getSort(), + totalHitCount, totalGroupedHitCount, groupDocsResult); + } + + + // TODO: merge with SearchGroup or not? + // ad: don't need to build a new hashmap + // disad: blows up the size of SearchGroup if we need many of them, and couples implementations + public class SearchGroupDocs extends Group { + + public final MutableValue groupValue; + public final TopDocsCollector collector; + + public SearchGroupDocs(MutableValue groupValue, TopDocsCollector collector) { + this.groupValue = groupValue; + this.collector = collector; + } + + @Override + public MutableValue getValue() { + return groupValue; + } + } +} Index: modules/queries/src/java/org/apache/lucene/queries/function/docvalues/ByteRefIndexDocDV.java =================================================================== --- modules/queries/src/java/org/apache/lucene/queries/function/docvalues/ByteRefIndexDocDV.java (revision ) +++ modules/queries/src/java/org/apache/lucene/queries/function/docvalues/ByteRefIndexDocDV.java (revision ) @@ -0,0 +1,136 @@ +package org.apache.lucene.queries.function.docvalues; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.values.IndexDocValues; +import org.apache.lucene.index.values.ValueType; +import org.apache.lucene.queries.function.DocValues; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.CharsRef; +import org.apache.lucene.util.mutable.MutableValue; +import org.apache.lucene.util.mutable.MutableValueStr; + +import java.io.IOException; + +/** + * + */ +public abstract class ByteRefIndexDocDV extends DocValues { + + protected final ValueSource vs; + protected final IndexDocValues.Source source; + + protected ByteRefIndexDocDV(ValueSource vs, IndexReader.AtomicReaderContext context, String field) throws IOException { + this.vs = vs; + this.source = context.reader.perDocValues().docValues(field).getSource(); + } + + @Override + public String toString(int doc) { + return vs.description() + '=' + strVal(doc); + } + + public static ByteRefIndexDocDV create(ValueSource vs, + IndexReader.AtomicReaderContext readerContext, + String field, + ValueType type) throws IOException { + switch (type) { + case FIXED_INTS_16: + case FIXED_INTS_32: + case FIXED_INTS_64: + case FIXED_INTS_8: + case VAR_INTS: + throw new UnsupportedOperationException("Not implemented yet"); +// return new I(field); + case FLOAT_32: + case FLOAT_64: + throw new UnsupportedOperationException("Not implemented yet"); +// return new F(field); + case BYTES_FIXED_STRAIGHT: + case BYTES_FIXED_DEREF: + case BYTES_FIXED_SORTED: + case BYTES_VAR_STRAIGHT: + case BYTES_VAR_DEREF: + case BYTES_VAR_SORTED: + return new BR(vs, readerContext, field); + default: + throw new IllegalStateException("unrecognized index values mode " + type); + } + } + + static class BR extends ByteRefIndexDocDV { + + private final BytesRef spare = new BytesRef(); + private final CharsRef spareChars = new CharsRef(); + + BR(ValueSource vs, IndexReader.AtomicReaderContext context, String field) throws IOException { + super(vs, context, field); + } + + @Override + public boolean exists(int doc) { + return source.getBytes(doc, spare).bytes.length != 0; + } + + @Override + public boolean bytesVal(int doc, BytesRef target) { + source.getBytes(doc, target); + return target.bytes.length != 0; + } + + @Override + public boolean boolVal(int doc) { + return exists(doc); + } + + @Override + public int numOrd() { + return source.getValueCount(); + } + + @Override + public String strVal(int doc) { + source.getBytes(doc, spare); + spare.utf8ToChars(spareChars); + return spareChars.toString(); + } + + @Override + public ValueFiller getValueFiller() { + return new ValueFiller() { + + private final MutableValueStr spare = new MutableValueStr(); + + @Override + public MutableValue getValue() { + return spare; + } + + @Override + public void fillValue(int doc) { + source.getBytes(doc, spare.value); + spare.exists = spare.value.bytes.length != 0; + } + }; + } + + } + +} Index: modules/grouping/build.xml =================================================================== --- modules/grouping/build.xml (revision 1178182) +++ modules/grouping/build.xml (revision ) @@ -18,8 +18,9 @@ --> + - Collectors for grouping search results + Grouping module. Collectors for grouping search results @@ -29,5 +30,18 @@ + + + + + + + + + + + + + Index: modules/queries/src/java/org/apache/lucene/queries/function/valuesource/ByteRefIndexValueSource.java =================================================================== --- modules/queries/src/java/org/apache/lucene/queries/function/valuesource/ByteRefIndexValueSource.java (revision ) +++ modules/queries/src/java/org/apache/lucene/queries/function/valuesource/ByteRefIndexValueSource.java (revision ) @@ -0,0 +1,71 @@ +package org.apache.lucene.queries.function.valuesource; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.values.ValueType; +import org.apache.lucene.queries.function.DocValues; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.queries.function.docvalues.ByteRefIndexDocDV; + +import java.io.IOException; +import java.util.Map; + +/** + * + */ +public class ByteRefIndexValueSource extends ValueSource { + + private final String field; + private final ValueType type; + + public ByteRefIndexValueSource(String field, ValueType type) { + this.field = field; + this.type = type; + } + + @Override + public DocValues getValues(Map context, IndexReader.AtomicReaderContext readerContext) throws IOException { + return ByteRefIndexDocDV.create(this, readerContext, field, type); + } + + @Override + public String description() { + return field; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + ByteRefIndexValueSource that = (ByteRefIndexValueSource) o; + + if (field != null ? !field.equals(that.field) : that.field != null) return false; + if (type != that.type) return false; + + return true; + } + + @Override + public int hashCode() { + int result = field != null ? field.hashCode() : 0; + result = 31 * result + (type != null ? type.hashCode() : 0); + return result; + } +}