Index: modules/grouping/src/java/org/apache/lucene/search/grouping/infrastructure/groupfield/FieldGroupHolder.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/infrastructure/groupfield/FieldGroupHolder.java (revision ) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/infrastructure/groupfield/FieldGroupHolder.java (revision ) @@ -0,0 +1,383 @@ +package org.apache.lucene.search.grouping.infrastructure.groupfield; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.search.grouping.SentinelIntSet; +import org.apache.lucene.search.grouping.infrastructure.GroupHolder; +import org.apache.lucene.search.grouping.infrastructure.groupfield.FieldGroupValue.*; +import org.apache.lucene.search.grouping.infrastructure.groupfield.GroupFieldValueSource.*; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.OpenBitSet; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * + */ +public abstract class FieldGroupHolder implements GroupHolder { + + protected final GFVS groupValueSource; + + protected FieldGroupHolder(GFVS groupValueSource) { + this.groupValueSource = groupValueSource; + } + + public boolean remove(int doc) { + throw new UnsupportedOperationException(); + } + + static class ByteFieldGroupHolder extends FieldGroupHolder { + + private final List groups; + private ByteFieldGroupValue[] segmentValues; + private OpenBitSet docSet; + + ByteFieldGroupHolder(ByteGroupFieldValueSource groupValueSource, int initialSize) { + super(groupValueSource); + groups = new ArrayList(initialSize); + } + + public boolean exists(int doc) { + return docSet.fastGet(doc); + } + + public ByteFieldGroupValue get(int doc) { + return segmentValues[doc]; + } + + public void set(int doc, ByteFieldGroupValue value) { + segmentValues[doc] = value; + docSet.fastSet(doc); + groups.add(value); + } + + public List getHeldGroups() { + return groups; + } + + public int getHeldGroupsSize() { + return groups.size(); + } + + public void prepareForNextSegment(IndexReader.AtomicReaderContext context) throws IOException { + final byte [] values = FieldCache.DEFAULT.getBytes(context.reader, groupValueSource.fieldName); + docSet = new OpenBitSet(context.reader.maxDoc()); + segmentValues = new ByteFieldGroupValue[context.reader.maxDoc()]; + for (ByteFieldGroupValue group : groups) { + final int index = Arrays.binarySearch(values, group.getConcreteValue()); + if (index >= 0) { + docSet.fastSet(index); + segmentValues[index] = group; + } + } + } + } + + static class ShortFieldGroupHolder extends FieldGroupHolder { + + private final List groups; + private ShortFieldGroupValue[] segmentValues; + private OpenBitSet docSet; + + ShortFieldGroupHolder(ShortGroupFieldValueSource groupValueSource, int initialSize) { + super(groupValueSource); + groups = new ArrayList(initialSize); + } + + public boolean exists(int doc) { + return docSet.fastGet(doc); + } + + public ShortFieldGroupValue get(int doc) { + return segmentValues[doc]; + } + + public void set(int doc, ShortFieldGroupValue value) { + segmentValues[doc] = value; + docSet.fastSet(doc); + groups.add(value); + } + + public List getHeldGroups() { + return groups; + } + + public int getHeldGroupsSize() { + return groups.size(); + } + + public void prepareForNextSegment(IndexReader.AtomicReaderContext context) throws IOException { + final short[] values = FieldCache.DEFAULT.getShorts(context.reader, groupValueSource.fieldName); + docSet = new OpenBitSet(context.reader.maxDoc()); + segmentValues = new ShortFieldGroupValue[context.reader.maxDoc()]; + for (ShortFieldGroupValue group : groups) { + final int index = Arrays.binarySearch(values, group.getConcreteValue()); + if (index >= 0) { + docSet.fastSet(index); + segmentValues[index] = group; + } + } + } + } + + static class IntFieldGroupHolder extends FieldGroupHolder { + + private final List groups; + private IntFieldGroupValue[] segmentValues; + private OpenBitSet docSet; + + IntFieldGroupHolder(IntGroupFieldValueSource groupValueSource, int initialSize) { + super(groupValueSource); + groups = new ArrayList(initialSize); + } + + public boolean exists(int doc) { + return docSet.fastGet(doc); + } + + public IntFieldGroupValue get(int doc) { + return segmentValues[doc]; + } + + public void set(int doc, IntFieldGroupValue value) { + segmentValues[doc] = value; + docSet.fastSet(doc); + groups.add(value); + } + + public List getHeldGroups() { + return groups; + } + + public int getHeldGroupsSize() { + return groups.size(); + } + + public void prepareForNextSegment(IndexReader.AtomicReaderContext context) throws IOException { + final int[] values = FieldCache.DEFAULT.getInts(context.reader, groupValueSource.fieldName); + docSet = new OpenBitSet(context.reader.maxDoc()); + segmentValues = new IntFieldGroupValue[context.reader.maxDoc()]; + for (IntFieldGroupValue group : groups) { + final int index = Arrays.binarySearch(values, group.getConcreteValue()); + if (index >= 0) { + docSet.fastSet(index); + segmentValues[index] = group; + } + } + } + } + + static class LongFieldGroupHolder extends FieldGroupHolder { + + private final List groups; + private LongFieldGroupValue[] segmentValues; + private OpenBitSet docSet; + + LongFieldGroupHolder(LongGroupFieldValueSource groupValueSource, int initialSize) { + super(groupValueSource); + groups = new ArrayList(initialSize); + } + + public boolean exists(int doc) { + return docSet.fastGet(doc); + } + + public LongFieldGroupValue get(int doc) { + return segmentValues[doc]; + } + + public void set(int doc, LongFieldGroupValue value) { + segmentValues[doc] = value; + docSet.fastSet(doc); + groups.add(value); + } + + public List getHeldGroups() { + return groups; + } + + public int getHeldGroupsSize() { + return groups.size(); + } + + public void prepareForNextSegment(IndexReader.AtomicReaderContext context) throws IOException { + final long [] values = FieldCache.DEFAULT.getLongs(context.reader, groupValueSource.fieldName); + docSet = new OpenBitSet(context.reader.maxDoc()); + segmentValues = new LongFieldGroupValue[context.reader.maxDoc()]; + for (LongFieldGroupValue group : groups) { + final int index = Arrays.binarySearch(values, group.getConcreteValue()); + if (index >= 0) { + docSet.fastSet(index); + segmentValues[index] = group; + } + } + } + } + + static class FloatFieldGroupHolder extends FieldGroupHolder { + + private final List groups; + private FloatFieldGroupValue[] segmentValues; + private OpenBitSet docSet; + + FloatFieldGroupHolder(FloatGroupFieldValueSource groupValueSource, int initialSize) { + super(groupValueSource); + groups = new ArrayList(initialSize); + } + + public boolean exists(int doc) { + return docSet.fastGet(doc); + } + + public FloatFieldGroupValue get(int doc) { + return segmentValues[doc]; + } + + public void set(int doc, FloatFieldGroupValue value) { + segmentValues[doc] = value; + docSet.fastSet(doc); + groups.add(value); + } + + public List getHeldGroups() { + return groups; + } + + public int getHeldGroupsSize() { + return groups.size(); + } + + public void prepareForNextSegment(IndexReader.AtomicReaderContext context) throws IOException { + final float[] values = FieldCache.DEFAULT.getFloats(context.reader, groupValueSource.fieldName); + docSet = new OpenBitSet(context.reader.maxDoc()); + segmentValues = new FloatFieldGroupValue[context.reader.maxDoc()]; + for (FloatFieldGroupValue group : groups) { + final int index = Arrays.binarySearch(values, group.getConcreteValue()); + if (index >= 0) { + docSet.fastSet(index); + segmentValues[index] = group; + } + } + } + } + + static class DoubleFieldGroupHolder extends FieldGroupHolder { + + private final List groups; + private DoubleFieldGroupValue[] segmentValues; + private OpenBitSet docSet; + + DoubleFieldGroupHolder(DoubleGroupFieldValueSource groupValueSource, int initialSize) { + super(groupValueSource); + groups = new ArrayList(initialSize); + } + + public boolean exists(int doc) { + return docSet.fastGet(doc); + } + + public DoubleFieldGroupValue get(int doc) { + return segmentValues[doc]; + } + + public void set(int doc, DoubleFieldGroupValue value) { + segmentValues[doc] = value; + docSet.fastSet(doc); + groups.add(value); + } + + public List getHeldGroups() { + return groups; + } + + public int getHeldGroupsSize() { + return groups.size(); + } + + public void prepareForNextSegment(IndexReader.AtomicReaderContext context) throws IOException { + final double[] values = FieldCache.DEFAULT.getDoubles(context.reader, groupValueSource.fieldName); + docSet = new OpenBitSet(context.reader.maxDoc()); + segmentValues = new DoubleFieldGroupValue[context.reader.maxDoc()]; + for (DoubleFieldGroupValue group : groups) { + final int index = Arrays.binarySearch(values, group.getConcreteValue()); + if (index >= 0) { + docSet.fastSet(index); + segmentValues[index] = group; + } + } + } + } + + static class DocTermsIndexFieldGroupHolder extends FieldGroupHolder { + + private final SentinelIntSet ordSet; + private final List groups; + private final BytesRef spareBytesRef; + + private BytesRefFieldGroupValue[] segmentValues; + + DocTermsIndexFieldGroupHolder(DocTermsIndexFieldValueSource groupValueSource, int initialSize) { + super(groupValueSource); + ordSet = new SentinelIntSet(initialSize, -1); + groups = new ArrayList(initialSize); + spareBytesRef = new BytesRef(); + } + + public boolean exists(int doc) { + return ordSet.exists(groupValueSource.getValueOrd(doc)); + } + + public BytesRefFieldGroupValue get(int doc) { + return segmentValues[groupValueSource.getValueOrd(doc)]; + } + + public void set(int doc, BytesRefFieldGroupValue value) { + final int key = groupValueSource.getValueOrd(doc); + segmentValues[key] = value; + ordSet.put(key); + groups.add(value); + } + + public List getHeldGroups() { + return groups; + } + + public int getHeldGroupsSize() { + return groups.size(); + } + + public void prepareForNextSegment(IndexReader.AtomicReaderContext context) throws IOException { + final FieldCache.DocTermsIndex index = FieldCache.DEFAULT.getTermsIndex(context.reader, groupValueSource.fieldName); + segmentValues = new FieldGroupValue.BytesRefFieldGroupValue[index.numOrd()]; + ordSet.clear(); + for (BytesRefFieldGroupValue group : groups) { + final int ord = index.binarySearchLookup(group.getConcreteValue(), spareBytesRef); + if (ord >= 0) { + ordSet.put(ord); + segmentValues[ord] = group; + } + } + } + } + +} Index: modules/grouping/src/java/org/apache/lucene/search/grouping/research/ResearchGroupRunner.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/research/ResearchGroupRunner.java (revision ) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/research/ResearchGroupRunner.java (revision ) @@ -0,0 +1,237 @@ +package org.apache.lucene.search.grouping.research; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.Term; +import org.apache.lucene.search.*; +import org.apache.lucene.search.grouping.AllGroupsCollector; +import org.apache.lucene.search.grouping.FirstPassGroupingCollector; +import org.apache.lucene.search.grouping.SearchGroup; +import org.apache.lucene.search.grouping.infrastructure.GroupHolder; +import org.apache.lucene.search.grouping.infrastructure.GroupValueSource; +import org.apache.lucene.search.grouping.infrastructure.groupfield.FieldGroupSpecification; +import org.apache.lucene.search.grouping.infrastructure.groupfield.FieldType; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; + +import java.io.File; +import java.io.IOException; +import java.security.SecureRandom; +import java.util.Collection; +import java.util.Random; +import java.util.concurrent.CountDownLatch; + +public class ResearchGroupRunner { + + // options + private final static int OPTION_CONCURRENT_SEARCHES = 1; + private final static int OPTION_NUMBER_SEARCHES = 50; + private final static int OPTION_INITIALSIZE = 7502; + private static final String OPTION_DATA_DIR = "/temp1/solr-travel/travel/data/index-4.0"; + private final static String OPTION_GROUP_BY_FIELD = "acco_id"; + private final static Sort OPTION_SORT = new Sort(new SortField("price", SortField.INT)); + + + private final static Random random = new SecureRandom(); + private final static Query[] queries; + + static { + TermQuery es = new TermQuery(new Term("country", "es")); + TermQuery gr = new TermQuery(new Term("country", "eg")); + BooleanQuery esOrGr = new BooleanQuery(); + esOrGr.add(es, BooleanClause.Occur.SHOULD); + esOrGr.add(gr, BooleanClause.Occur.SHOULD); + queries = new Query[]{ + new TermQuery(new Term("country", "es")), + new TermQuery(new Term("country", "us")), + new TermQuery(new Term("country", "it")), + es, + new TermQuery(new Term("country", "tr")), + new TermQuery(new Term("country", "gr")), + gr, + new TermQuery(new Term("country", "eg")), + new WildcardQuery(new Term("country", "e*")), + esOrGr, + new MatchAllDocsQuery() + }; + } + + public static void main(String[] args) throws Exception { + final Directory directory = FSDirectory.open(new File(OPTION_DATA_DIR)); + final IndexSearcher indexSearcher = new IndexSearcher(directory); + + // warm + for (int i = 0; i < 5; i++) { + warmFirstResearchPassSearch(indexSearcher); + } + + // callbacks + SearchCallback searchWithFirstPassCollector = new SearchCallback() { + public void callback(int i, Query query, IndexSearcher indexSearcher) throws IOException { + firstPassSearch(i, query, indexSearcher, OPTION_SORT); + } + }; + SearchCallback searchWithResearchFirstPassCollector = new SearchCallback() { + public void callback(int i, Query query, IndexSearcher indexSearcher) throws IOException { + firstResearchPassSearch(i, query, indexSearcher, OPTION_SORT); + } + }; + SearchCallback searchWithMatchAllCollector = new SearchCallback() { + public void callback(int i, Query query, IndexSearcher indexSearcher) throws IOException { + searchWithMatchAllCollector(i, query, indexSearcher); + } + }; + SearchCallback searchWithResearchMatchAllCollector = new SearchCallback() { + public void callback(int i, Query query, IndexSearcher indexSearcher) throws IOException { + searchWithResearchMatchAllCollector(i, query, indexSearcher); + } + }; + + + System.out.println("===== First pass collectors execution ====="); + printHeadersForTopGroups(); + executeSearch(indexSearcher, searchWithFirstPassCollector, searchWithResearchFirstPassCollector); + System.out.println("\n\n\n===== All groups collectors execution ====="); + printHeadersForCount(); + executeSearch(indexSearcher, searchWithMatchAllCollector, searchWithResearchMatchAllCollector); + } + + private static void executeSearch(final IndexSearcher indexSearcher, final SearchCallback callback1, final SearchCallback callback2) throws Exception { + final CountDownLatch latch = new CountDownLatch(OPTION_CONCURRENT_SEARCHES); + Runnable r = new Runnable() { + + public void run() { + try { + for (int i = 1; i <= OPTION_NUMBER_SEARCHES; i+=2) { + Query query = queries[random.nextInt(queries.length)]; + callback1.callback(i, query, indexSearcher); + callback2.callback(i + 1, query, indexSearcher); + } + } catch (Exception e) { + e.printStackTrace(); + } finally { + latch.countDown(); + } + + } + + }; + + new Thread(r).start(); + latch.await(); + } + + + //====================================== FirstPassCollectors ========================================================= + + private static void firstPassSearch(int i, Query query, IndexSearcher indexSearcher, Sort sort) throws IOException { + long startTime = System.currentTimeMillis(); + FirstPassGroupingCollector first = new FirstPassGroupingCollector(OPTION_GROUP_BY_FIELD, sort, 10); + indexSearcher.search(query, first); + Collection groups = first.getTopGroups(0, false); + long timeTaken = System.currentTimeMillis() - startTime; + long mem = (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory()) / (1024 * 1024); + StringBuilder builder = new StringBuilder(); + for (SearchGroup group : groups) { + builder.append(group.groupValue.utf8ToString()).append(", "); + } + printStatsForTopGroups(i, first.getClass(), query, builder.toString(), timeTaken, mem); + } + + private static void firstResearchPassSearch(int i, Query query, IndexSearcher indexSearcher, Sort sort) throws IOException { + long startTime = System.currentTimeMillis(); + FieldGroupSpecification specification = new FieldGroupSpecification(OPTION_GROUP_BY_FIELD, FieldType.BYTEREF); + ResearchFirstPassGroupingCollector first = new ResearchFirstPassGroupingCollector(specification, sort, 10); + indexSearcher.search(query, first); + Collection groups = first.getTopGroups(0, false); + long timeTaken = System.currentTimeMillis() - startTime; + long mem = (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory()) / (1024 * 1024); + StringBuilder builder = new StringBuilder(); + for (ResearchSearchGroup group : groups) { + builder.append(group.groupValue.toReadableString()).append(", "); + } + printStatsForTopGroups(i, first.getClass(), query, builder.toString(), timeTaken, mem); + } + + private static void warmFirstResearchPassSearch(IndexSearcher indexSearcher) throws IOException { + FieldGroupSpecification specification = new FieldGroupSpecification(OPTION_GROUP_BY_FIELD, FieldType.BYTEREF); + indexSearcher.search(queries[random.nextInt(queries.length)], new ResearchFirstPassGroupingCollector(specification, OPTION_SORT, 10)); + } + + private static void printHeadersForTopGroups() { + System.out.println(String.format("%-9s|%-40s|%-24s|%-12s|%-24s|%s", "number", "collector", "query", "time (ms)", "Current mem usage (MB)", "Groups")); + } + + private static void printStatsForTopGroups(int i, Class collector, Query query, String groupsStr, long timeTaken, long mem) { + System.out.println(String.format("%-9d|%-40s|%-24s|%-12d|%-24d|%s", i, collector.getSimpleName(), query, timeTaken, mem, groupsStr)); + } + + + //====================================== AllGroupCollectors ========================================================== + + private static void searchWithMatchAllCollector(int i, Query query, IndexSearcher indexSearcher) throws IOException { + long startTime = System.currentTimeMillis(); + + AllGroupsCollector first = new AllGroupsCollector(OPTION_GROUP_BY_FIELD, OPTION_INITIALSIZE); + indexSearcher.search(query, first); + int count = first.getGroupCount(); + + long timeTaken = System.currentTimeMillis() - startTime; + long mem = (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory()) / (1024 * 1024); + printStatsForCount(i, first.getClass(), query, count, timeTaken, mem); + } + + private static void searchWithResearchMatchAllCollector(int i, Query query, IndexSearcher indexSearcher) throws IOException { + long startTime = System.currentTimeMillis(); + + FieldGroupSpecification specification = new FieldGroupSpecification(OPTION_GROUP_BY_FIELD, FieldType.BYTEREF); + GroupValueSource groupValueSource = specification.createGroupValueSource(); + GroupHolder groupHolder = specification.createGroupHolder(groupValueSource, OPTION_INITIALSIZE); + ResearchAllGroupsCollector first = new ResearchAllGroupsCollector(groupValueSource, groupHolder); + indexSearcher.search(query, first); + int count = first.getGroupCount(); + + long timeTaken = System.currentTimeMillis() - startTime; + long mem = (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory()) / (1024 * 1024); + printStatsForCount(i, first.getClass(), query, count, timeTaken, mem); + } + + private static void printHeadersForCount() { + System.out.println(String.format("%-9s|%-30s|%-24s|%-12s|%-12s|%-12s", "number", "collector", "query", "time (ms)", "count", "Current mem usage (MB)")); + } + + private static void printStatsForCount(int i, Class collector, Query query, int count, long timeTaken, long mem) { + System.out.println(String.format("%-9d|%-30s|%-24s|%-12d|%-12d|%-12d", i, collector.getSimpleName(), query, timeTaken, count, mem)); + } + + private static void warmWithResearchMatchAllCollector(IndexSearcher indexSearcher) throws IOException { + FieldGroupSpecification specification = new FieldGroupSpecification(OPTION_GROUP_BY_FIELD, FieldType.BYTEREF); + GroupValueSource groupValueSource = specification.createGroupValueSource(); + GroupHolder groupHolder = specification.createGroupHolder(groupValueSource, OPTION_INITIALSIZE); + indexSearcher.search(queries[random.nextInt(queries.length)], new ResearchAllGroupsCollector(groupValueSource, groupHolder)); + } + + + // If we only had function pointers..... + static abstract class SearchCallback { + + public abstract void callback(int i, Query query, IndexSearcher indexSearcher) throws IOException; + + } + +} Index: modules/grouping/src/java/org/apache/lucene/search/grouping/infrastructure/GroupValueSource.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/infrastructure/GroupValueSource.java (revision ) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/infrastructure/GroupValueSource.java (revision ) @@ -0,0 +1,34 @@ +package org.apache.lucene.search.grouping.infrastructure; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader; + +import java.io.IOException; + +/** + * + */ +public interface GroupValueSource { + + GroupValue getValue(int doc, T reuse); + + void prepareForNextSegment(IndexReader.AtomicReaderContext context) throws IOException; + + GroupValue getScratchValue(); +} Index: modules/grouping/src/java/org/apache/lucene/search/grouping/infrastructure/GroupValue.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/infrastructure/GroupValue.java (revision ) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/infrastructure/GroupValue.java (revision ) @@ -0,0 +1,36 @@ +package org.apache.lucene.search.grouping.infrastructure; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * + */ +public interface GroupValue { + + Object getValue(); + + String toReadableString(); + + void setValue(GroupValue other); + + boolean equals(Object other); + + int hashCode(); + + GroupValue duplicate(); +} Index: modules/grouping/src/java/org/apache/lucene/search/grouping/research/ResearchSearchGroup.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/research/ResearchSearchGroup.java (revision ) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/research/ResearchSearchGroup.java (revision ) @@ -0,0 +1,26 @@ +package org.apache.lucene.search.grouping.research; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.grouping.infrastructure.GroupValue; + +/** @lucene.experimental */ +public class ResearchSearchGroup { + public GroupValue groupValue; + public Comparable[] sortValues; +} Index: modules/grouping/src/java/org/apache/lucene/search/grouping/infrastructure/groupfield/FieldType.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/infrastructure/groupfield/FieldType.java (revision ) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/infrastructure/groupfield/FieldType.java (revision ) @@ -0,0 +1,33 @@ +package org.apache.lucene.search.grouping.infrastructure.groupfield; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Field types. Used by {@link FieldGroupSpecification} to select right implementations. + */ +public enum FieldType { + + BYTE, + SHORT, + INT, + LONG, + FLOAT, + DOUBLE, + BYTEREF // or just call it String? + +} Index: modules/grouping/src/java/org/apache/lucene/search/grouping/infrastructure/groupfield/FieldGroupValue.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/infrastructure/groupfield/FieldGroupValue.java (revision ) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/infrastructure/groupfield/FieldGroupValue.java (revision ) @@ -0,0 +1,442 @@ +package org.apache.lucene.search.grouping.infrastructure.groupfield; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.grouping.infrastructure.GroupValue; +import org.apache.lucene.util.BytesRef; + +/** + * + */ +public abstract class FieldGroupValue implements GroupValue { + + + // Or maybe use generics.... But then we have the overhead of autoboxing.... + /*protected T value; + + protected FieldGroupValue(T value) { + this.value = value; + } + + public T getValue() { + return value; + } + + public String toReadableString() { + return value.toString(); + } + + public void setValue(T value) { + this.value = value; + } + + public int compareTo(T t) { + return value.compareTo(t); + } + + public GroupValue duplicate() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + FieldGroupValue that = (FieldGroupValue) o; + + if (value != null ? !value.equals(that.value) : that.value != null) return false; + + return true; + } + + @Override + public int hashCode() { + return value != null ? value.hashCode() : 0; + }*/ + + /*@Override + public int hashCode() { + return value.hashCode(); + } + + + + @Override + public boolean equals(Object o) { + + return value.equals(o); + }*/ + + static class ByteFieldGroupValue extends FieldGroupValue { + + private byte value; + + ByteFieldGroupValue(byte value) { + this.value = value; + } + + public Object getValue() { + return value; + } + + public String toReadableString() { + return Byte.toString(value); + } + + public void setValue(GroupValue other) { + ByteFieldGroupValue byteFieldGroupValue = (ByteFieldGroupValue) other; + value = byteFieldGroupValue.value; + } + + public void setConcreteValue(byte value) { + this.value = value; + } + + public GroupValue duplicate() { + return new ByteFieldGroupValue(value); + } + + public byte getConcreteValue() { + return value; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + ByteFieldGroupValue that = (ByteFieldGroupValue) o; + + if (value != that.value) return false; + + return true; + } + + @Override + public int hashCode() { + return (int) value; + } + } + + static class ShortFieldGroupValue extends FieldGroupValue { + + private short value; + + ShortFieldGroupValue(short value) { + this.value = value; + } + + public Object getValue() { + return value; + } + + public String toReadableString() { + return Short.toString(value); + } + + public void setValue(GroupValue other) { + ShortFieldGroupValue shortFieldGroupValue = (ShortFieldGroupValue) other; + value = shortFieldGroupValue.value; + } + + public void setConcreteValue(short value) { + this.value = value; + } + + public short getConcreteValue() { + return value; + } + + public GroupValue duplicate() { + return new ShortFieldGroupValue(value); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + ShortFieldGroupValue that = (ShortFieldGroupValue) o; + + if (value != that.value) return false; + + return true; + } + + @Override + public int hashCode() { + return (int) value; + } + } + + static class IntFieldGroupValue extends FieldGroupValue { + + private int value; + + IntFieldGroupValue(int value) { + this.value = value; + } + + public Object getValue() { + return value; + } + + public String toReadableString() { + return Integer.toString(value); + } + + public void setValue(GroupValue other) { + IntFieldGroupValue intFieldGroupValue = (IntFieldGroupValue) other; + value = intFieldGroupValue.value; + } + + public void setConcreteValue(int value) { + this.value = value; + } + + public int getConcreteValue() { + return value; + } + + public GroupValue duplicate() { + return new IntFieldGroupValue(value); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + IntFieldGroupValue that = (IntFieldGroupValue) o; + + if (value != that.value) return false; + + return true; + } + + @Override + public int hashCode() { + return value; + } + } + + static class LongFieldGroupValue extends FieldGroupValue { + + private long value; + + LongFieldGroupValue(long value) { + this.value = value; + } + + public Object getValue() { + return value; + } + + public String toReadableString() { + return Long.toString(value); + } + + public void setValue(GroupValue other) { + LongFieldGroupValue longFieldGroupValue = (LongFieldGroupValue) other; + value = longFieldGroupValue.value; + } + + public void setConcreteValue(long value) { + this.value = value; + } + + public long getConcreteValue() { + return value; + } + + public GroupValue duplicate() { + return new LongFieldGroupValue(value); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + LongFieldGroupValue that = (LongFieldGroupValue) o; + + if (value != that.value) return false; + + return true; + } + + @Override + public int hashCode() { + return (int) (value ^ (value >>> 32)); + } + + } + + static class FloatFieldGroupValue extends FieldGroupValue { + + private float value; + + FloatFieldGroupValue(float value) { + this.value = value; + } + + public Object getValue() { + return value; + } + + public String toReadableString() { + return Float.toString(value); + } + + public void setValue(GroupValue other) { + FloatFieldGroupValue floatFieldGroupValue = (FloatFieldGroupValue) other; + value = floatFieldGroupValue.value; + } + + public GroupValue duplicate() { + return new FloatFieldGroupValue(value); + } + + public void setConcreteValue(float value) { + this.value = value; + } + + public float getConcreteValue() { + return value; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + FloatFieldGroupValue that = (FloatFieldGroupValue) o; + + if (Float.compare(that.value, value) != 0) return false; + + return true; + } + + @Override + public int hashCode() { + return (value != +0.0f ? Float.floatToIntBits(value) : 0); + } + + } + + static class DoubleFieldGroupValue extends FieldGroupValue { + + private double value; + + DoubleFieldGroupValue(double value) { + this.value = value; + } + + public Object getValue() { + return value; + } + + public String toReadableString() { + return Double.toString(value); + } + + public void setValue(GroupValue other) { + DoubleFieldGroupValue doubleFieldGroupValue = (DoubleFieldGroupValue) other; + value = doubleFieldGroupValue.value; + } + + public void setConcreteValue(double value) { + this.value = value; + } + + public double getConcreteValue() { + return value; + } + + public GroupValue duplicate() { + return new DoubleFieldGroupValue(value); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + DoubleFieldGroupValue that = (DoubleFieldGroupValue) o; + + if (Double.compare(that.value, value) != 0) return false; + + return true; + } + + @Override + public int hashCode() { + long temp = value != +0.0d ? Double.doubleToLongBits(value) : 0L; + return (int) (temp ^ (temp >>> 32)); + } + } + + static class BytesRefFieldGroupValue extends FieldGroupValue { + + private BytesRef value; + + BytesRefFieldGroupValue(BytesRef value) { + this.value = value; + } + + public Object getValue() { + return value; + } + + public BytesRef getConcreteValue() { + return value; + } + + public void setValue(GroupValue other) { + BytesRefFieldGroupValue bytesRefFieldGroupValue = (BytesRefFieldGroupValue) other; + value.copy(bytesRefFieldGroupValue.value); + } + + public GroupValue duplicate() { + return new BytesRefFieldGroupValue(new BytesRef(value)); + } + + public String toReadableString() { + return value.utf8ToString(); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + BytesRefFieldGroupValue that = (BytesRefFieldGroupValue) o; + + if (value != null ? !value.equals(that.value) : that.value != null) return false; + + return true; + } + + @Override + public int hashCode() { + return value != null ? value.hashCode() : 0; + } + } + +} Index: modules/grouping/src/java/org/apache/lucene/search/grouping/research/ResearchFirstPassGroupingCollector.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/research/ResearchFirstPassGroupingCollector.java (revision ) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/research/ResearchFirstPassGroupingCollector.java (revision ) @@ -0,0 +1,348 @@ +package org.apache.lucene.search.grouping.research; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.search.*; +import org.apache.lucene.search.grouping.infrastructure.GroupSpecification; +import org.apache.lucene.search.grouping.infrastructure.GroupValue; +import org.apache.lucene.search.grouping.infrastructure.GroupValueSource; + +import java.io.IOException; +import java.util.*; + +/** FirstPassGroupingCollector is the first of two passes necessary + * to collect grouped hits. This pass gathers the top N sorted + * groups. + * + *

See {@link org.apache.lucene.search.grouping} for more + * details including a full code example.

+ * + * @lucene.experimental + */ + +public class ResearchFirstPassGroupingCollector extends Collector { + + private final Sort groupSort; + private final FieldComparator[] comparators; + private final int[] reversed; + private final int topNGroups; + private final HashMap groupMap; + private final GroupValue scratchValue; + private final int compIDXEnd; + + // Set once we reach topNGroups unique groups: + private TreeSet orderedGroups; + private int docBase; + private int spareSlot; + + private GroupValueSource groupValueSource; + + public ResearchFirstPassGroupingCollector(GroupSpecification specification, Sort groupSort, int topNGroups) throws IOException { + if (topNGroups < 1) { + throw new IllegalArgumentException("topNGroups must be >= 1 (got " + topNGroups + ")"); + } + + groupValueSource = specification.createGroupValueSource(); + scratchValue = groupValueSource.getScratchValue(); + // TODO: allow null groupSort to mean "by relevance", + // and specialize it? + this.groupSort = groupSort; + + this.topNGroups = topNGroups; + + final SortField[] sortFields = groupSort.getSort(); + comparators = new FieldComparator[sortFields.length]; + compIDXEnd = comparators.length - 1; + reversed = new int[sortFields.length]; + for (int i = 0; i < sortFields.length; i++) { + final SortField sortField = sortFields[i]; + + // use topNGroups + 1 so we have a spare slot to use for comparing (tracked by this.spareSlot): + comparators[i] = sortField.getComparator(topNGroups + 1, i); + reversed[i] = sortField.getReverse() ? -1 : 1; + } + + spareSlot = topNGroups; + groupMap = new HashMap(topNGroups); + } + + /** Returns top groups, starting from offset. This may + * return null, if no groups were collected, or if the + * number of unique groups collected is <= offset. */ + public Collection getTopGroups(int groupOffset, boolean fillFields) { + + //System.out.println("FP.getTopGroups groupOffset=" + groupOffset + " fillFields=" + fillFields + " groupMap.size()=" + groupMap.size()); + + if (groupOffset < 0) { + throw new IllegalArgumentException("groupOffset must be >= 0 (got " + groupOffset + ")"); + } + + if (groupMap.size() <= groupOffset) { + return null; + } + + if (orderedGroups == null) { + buildSortedSet(); + } + + final Collection result = new ArrayList(); + int upto = 0; + final int sortFieldCount = groupSort.getSort().length; + for(CollectedSearchGroup group : orderedGroups) { + if (upto++ < groupOffset) { + continue; + } + //System.out.println(" group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString())); + ResearchSearchGroup searchGroup = new ResearchSearchGroup(); + searchGroup.groupValue = group.groupValue; + if (fillFields) { + searchGroup.sortValues = new Comparable[sortFieldCount]; + for(int sortFieldIDX=0;sortFieldIDX 0) { + // Definitely competitive; set remaining comparators: + for (int compIDX2=compIDX+1; compIDX2 comparator = new Comparator() { + public int compare(CollectedSearchGroup o1, CollectedSearchGroup o2) { + for (int compIDX = 0;; compIDX++) { + FieldComparator fc = comparators[compIDX]; + final int c = reversed[compIDX] * fc.compare(o1.comparatorSlot, o2.comparatorSlot); + if (c != 0) { + return c; + } else if (compIDX == compIDXEnd) { + return o1.topDoc - o2.topDoc; + } + } + } + }; + + orderedGroups = new TreeSet(comparator); + orderedGroups.addAll(groupMap.values()); + assert orderedGroups.size() > 0; + + for (FieldComparator fc : comparators) { + fc.setBottom(orderedGroups.last().comparatorSlot); + } + } + + @Override + public boolean acceptsDocsOutOfOrder() { + return false; + } + + @Override + public void setNextReader(AtomicReaderContext readerContext) throws IOException { + docBase = readerContext.docBase; + groupValueSource.prepareForNextSegment(readerContext); + + for (int i=0; i + * Implementation detail: an int hash set (SentinelIntSet) + * is used to detect if a group is already added to the + * total count. For each segment the int set is cleared and filled + * with previous counted groups that occur in the new + * segment. + * + * @lucene.experimental + */ +public class ResearchAllGroupsCollector extends Collector { + + private final GroupValueSource groupValueSource; + private final GroupHolder groupHolder; + + public ResearchAllGroupsCollector(GroupValueSource groupValueSource, GroupHolder groupHolder) { + this.groupValueSource = groupValueSource; + this.groupHolder = groupHolder; + } + + public void setScorer(Scorer scorer) throws IOException { + } + + public void collect(int doc) throws IOException { + if (groupHolder.exists(doc)) { + return; + } + + GroupValue groupValue = groupValueSource.getValue(doc, null); + groupHolder.set(doc, groupValue); + } + + /** + * Returns the total number of groups for the executed search. + * This is a convenience method. The following code snippet has the same effect:
getGroups().size()
+ * + * @return The total number of groups for the executed search + */ + public int getGroupCount() { + return groupHolder.getHeldGroupsSize(); + } + + /** + * Returns the group values + *

+ * This is an unordered collections of group values. For each group that matched the query there is a {@link org.apache.lucene.util.BytesRef} + * representing a group value. + * + * @return the group values + */ + public Collection getGroups() { + return groupHolder.getHeldGroups(); + } + + public void setNextReader(IndexReader.AtomicReaderContext context) throws IOException { + groupValueSource.prepareForNextSegment(context); + groupHolder.prepareForNextSegment(context); + } + + public boolean acceptsDocsOutOfOrder() { + return true; + } +} \ No newline at end of file Index: modules/grouping/src/java/org/apache/lucene/search/grouping/infrastructure/GroupHolder.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/infrastructure/GroupHolder.java (revision ) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/infrastructure/GroupHolder.java (revision ) @@ -0,0 +1,44 @@ +package org.apache.lucene.search.grouping.infrastructure; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader; + +import java.io.IOException; +import java.util.List; + +/** + * + */ +public interface GroupHolder { + + boolean exists(int doc); + + GV get(int doc); + + void set(int doc, GV value); + + boolean remove(int doc); + + List getHeldGroups(); + + int getHeldGroupsSize(); + + void prepareForNextSegment(IndexReader.AtomicReaderContext context) throws IOException; + +} Index: modules/grouping/src/java/org/apache/lucene/search/grouping/infrastructure/GroupSpecification.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/infrastructure/GroupSpecification.java (revision ) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/infrastructure/GroupSpecification.java (revision ) @@ -0,0 +1,29 @@ +package org.apache.lucene.search.grouping.infrastructure; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * + */ +public interface GroupSpecification { + + GroupValueSource createGroupValueSource(); + + GroupHolder createGroupHolder(GroupValueSource gvf, int initialSize); + +} Index: modules/grouping/src/java/org/apache/lucene/search/grouping/SentinelIntSet.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/SentinelIntSet.java (revision 1103024) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/SentinelIntSet.java (revision ) @@ -20,7 +20,7 @@ import java.util.Arrays; /** A native int set where one value is reserved to mean "EMPTY" */ -class SentinelIntSet { +public class SentinelIntSet { public int[] keys; public int count; public final int emptyVal; Index: modules/grouping/src/java/org/apache/lucene/search/grouping/infrastructure/groupfield/FieldGroupSpecification.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/infrastructure/groupfield/FieldGroupSpecification.java (revision ) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/infrastructure/groupfield/FieldGroupSpecification.java (revision ) @@ -0,0 +1,94 @@ +package org.apache.lucene.search.grouping.infrastructure.groupfield; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.grouping.infrastructure.GroupHolder; +import org.apache.lucene.search.grouping.infrastructure.GroupSpecification; +import org.apache.lucene.search.grouping.infrastructure.GroupValueSource; +import org.apache.lucene.search.grouping.infrastructure.groupfield.GroupFieldValueSource.*; +import org.apache.lucene.search.grouping.infrastructure.groupfield.FieldGroupHolder.*; + +/** + * + */ +public class FieldGroupSpecification implements GroupSpecification { + + private final String field; + private final FieldType type; + + public FieldGroupSpecification(String field, FieldType type) { + if (field == null) { + throw new IllegalArgumentException("Argument field cannot be null"); + } + if (type == null) { + throw new IllegalArgumentException("Argument type cannot be null"); + } + + this.field = field; + this.type = type; + } + + public GroupValueSource createGroupValueSource() { + switch (type) { + case BYTE: + return new ByteGroupFieldValueSource(field); + case SHORT: + return new ShortGroupFieldValueSource(field); + case INT: + return new IntGroupFieldValueSource(field); + case LONG: + return new LongGroupFieldValueSource(field); + case FLOAT: + return new FloatGroupFieldValueSource(field); + case DOUBLE: + return new DoubleGroupFieldValueSource(field); + case BYTEREF: + return new GroupFieldValueSource.DocTermsIndexFieldValueSource(field); + default: + throw new IllegalStateException("Unsupported type: " + type); + } + } + + public GroupHolder createGroupHolder(GroupValueSource gvf, int initialSize) { + switch (type) { + case BYTE: + ByteGroupFieldValueSource byteGCFS = (ByteGroupFieldValueSource) gvf; + return new ByteFieldGroupHolder(byteGCFS, initialSize); + case SHORT: + ShortGroupFieldValueSource shortGCFS = (ShortGroupFieldValueSource) gvf; + return new ShortFieldGroupHolder(shortGCFS, initialSize); + case INT: + IntGroupFieldValueSource intGCFS = (IntGroupFieldValueSource) gvf; + return new IntFieldGroupHolder(intGCFS, initialSize); + case LONG: + LongGroupFieldValueSource longGCFS = (LongGroupFieldValueSource) gvf; + return new LongFieldGroupHolder(longGCFS, initialSize); + case FLOAT: + FloatGroupFieldValueSource floatGCFS = (FloatGroupFieldValueSource) gvf; + return new FloatFieldGroupHolder(floatGCFS, initialSize); + case DOUBLE: + DoubleGroupFieldValueSource doubleGCFS = (DoubleGroupFieldValueSource) gvf; + return new DoubleFieldGroupHolder(doubleGCFS, initialSize); + case BYTEREF: + DocTermsIndexFieldValueSource docTermsIndexGVF = (DocTermsIndexFieldValueSource) gvf; + return new DocTermsIndexFieldGroupHolder(docTermsIndexGVF, initialSize); + default: + throw new IllegalStateException("Unsupported type: " + type); + } + } +} Index: modules/grouping/src/java/org/apache/lucene/search/grouping/infrastructure/GroupType.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/infrastructure/GroupType.java (revision ) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/infrastructure/GroupType.java (revision ) @@ -0,0 +1,28 @@ +package org.apache.lucene.search.grouping.infrastructure; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * + */ +public enum GroupType { + + FIELD, + FUNCTION + +} Index: modules/grouping/src/java/org/apache/lucene/search/grouping/infrastructure/groupfield/GroupFieldValueSource.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/infrastructure/groupfield/GroupFieldValueSource.java (revision ) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/infrastructure/groupfield/GroupFieldValueSource.java (revision ) @@ -0,0 +1,232 @@ +package org.apache.lucene.search.grouping.infrastructure.groupfield; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.search.grouping.infrastructure.GroupValue; +import org.apache.lucene.search.grouping.infrastructure.GroupValueSource; +import org.apache.lucene.util.BytesRef; + +import java.io.IOException; + +/** + * + */ +public abstract class GroupFieldValueSource implements GroupValueSource { + + protected final String fieldName; + + protected GroupFieldValueSource(String fieldName) { + this.fieldName = fieldName; + } + + static class ByteGroupFieldValueSource extends GroupFieldValueSource { + + private byte[] values; + + ByteGroupFieldValueSource(String fieldName) { + super(fieldName); + } + + public GroupValue getValue(int doc, FieldGroupValue.LongFieldGroupValue reuse) { + final byte value = values[doc]; + if (reuse == null) { + return new FieldGroupValue.LongFieldGroupValue(value); + } + + reuse.setConcreteValue(value); + return reuse; + } + + public void prepareForNextSegment(IndexReader.AtomicReaderContext context) throws IOException { + values = FieldCache.DEFAULT.getBytes(context.reader, fieldName); + } + + public GroupValue getScratchValue() { + return new FieldGroupValue.ShortFieldGroupValue(Byte.MIN_VALUE); + } + } + + static class ShortGroupFieldValueSource extends GroupFieldValueSource { + + private short[] values; + + ShortGroupFieldValueSource(String fieldName) { + super(fieldName); + } + + public GroupValue getValue(int doc, FieldGroupValue.LongFieldGroupValue reuse) { + final short value = values[doc]; + if (reuse == null) { + return new FieldGroupValue.LongFieldGroupValue(value); + } + + reuse.setConcreteValue(value); + return reuse; + } + + public void prepareForNextSegment(IndexReader.AtomicReaderContext context) throws IOException { + values = FieldCache.DEFAULT.getShorts(context.reader, fieldName); + } + + public GroupValue getScratchValue() { + return new FieldGroupValue.ShortFieldGroupValue(Short.MIN_VALUE); + } + } + + static class IntGroupFieldValueSource extends GroupFieldValueSource { + + private int[] values; + + IntGroupFieldValueSource(String fieldName) { + super(fieldName); + } + + public GroupValue getValue(int doc, FieldGroupValue.LongFieldGroupValue reuse) { + final int value = values[doc]; + if (reuse == null) { + return new FieldGroupValue.LongFieldGroupValue(value); + } + + reuse.setValue(reuse); + reuse.setConcreteValue(value); + return reuse; + } + + public void prepareForNextSegment(IndexReader.AtomicReaderContext context) throws IOException { + values = FieldCache.DEFAULT.getInts(context.reader, fieldName); + } + + public GroupValue getScratchValue() { + return new FieldGroupValue.IntFieldGroupValue(Integer.MIN_VALUE); + } + } + + static class LongGroupFieldValueSource extends GroupFieldValueSource { + + private long[] values; + + LongGroupFieldValueSource(String fieldName) { + super(fieldName); + } + + public GroupValue getValue(int doc, FieldGroupValue.LongFieldGroupValue reuse) { + final long value = values[doc]; + if (reuse == null) { + return new FieldGroupValue.LongFieldGroupValue(value); + } + + reuse.setConcreteValue(value); + return reuse; + } + + public void prepareForNextSegment(IndexReader.AtomicReaderContext context) throws IOException { + values = FieldCache.DEFAULT.getLongs(context.reader, fieldName); + } + + public GroupValue getScratchValue() { + return new FieldGroupValue.LongFieldGroupValue(Long.MIN_VALUE); + } + } + + static class FloatGroupFieldValueSource extends GroupFieldValueSource { + + private float[] values; + + FloatGroupFieldValueSource(String fieldName) { + super(fieldName); + } + + public GroupValue getValue(int doc, FieldGroupValue.FloatFieldGroupValue reuse) { + final float value = values[doc]; + if (reuse == null) { + return new FieldGroupValue.FloatFieldGroupValue(value); + } + + reuse.setConcreteValue(value); + return reuse; + } + + public void prepareForNextSegment(IndexReader.AtomicReaderContext context) throws IOException { + values = FieldCache.DEFAULT.getFloats(context.reader, fieldName); + } + + public GroupValue getScratchValue() { + return new FieldGroupValue.FloatFieldGroupValue(Float.MIN_VALUE); + } + } + + static class DoubleGroupFieldValueSource extends GroupFieldValueSource { + + private double[] values; + + DoubleGroupFieldValueSource(String fieldName) { + super(fieldName); + } + + public GroupValue getValue(int doc, FieldGroupValue.DoubleFieldGroupValue reuse) { + final double value = values[doc]; + if (reuse == null) { + return new FieldGroupValue.DoubleFieldGroupValue(value); + } + + reuse.setConcreteValue(value); + return reuse; + } + + public void prepareForNextSegment(IndexReader.AtomicReaderContext context) throws IOException { + values = FieldCache.DEFAULT.getDoubles(context.reader, fieldName); + } + + public GroupValue getScratchValue() { + return new FieldGroupValue.DoubleFieldGroupValue(Double.MIN_VALUE); + } + } + + static class DocTermsIndexFieldValueSource extends GroupFieldValueSource { + + private FieldCache.DocTermsIndex values; + + DocTermsIndexFieldValueSource(String fieldName) { + super(fieldName); + } + + public GroupValue getValue(int doc, FieldGroupValue.BytesRefFieldGroupValue reuse) { + if (reuse == null) { + return new FieldGroupValue.BytesRefFieldGroupValue(values.getTerm(doc, new BytesRef())); + } + + values.getTerm(doc, reuse.getConcreteValue()); + return reuse; + } + + public int getValueOrd(int doc) { + return values.getOrd(doc); + } + + public void prepareForNextSegment(IndexReader.AtomicReaderContext context) throws IOException { + values = FieldCache.DEFAULT.getTermsIndex(context.reader, fieldName); + } + + public GroupValue getScratchValue() { + return new FieldGroupValue.BytesRefFieldGroupValue(new BytesRef()); + } + } + +}