Index: modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java =================================================================== --- modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java (revision 1187902) +++ modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java (revision ) @@ -21,22 +21,21 @@ import java.util.*; import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.FieldType; -import org.apache.lucene.document.NumericField; +import org.apache.lucene.document.*; import org.apache.lucene.index.FieldInfo.IndexOptions; -import org.apache.lucene.document.StringField; -import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.SlowMultiReaderWrapper; import org.apache.lucene.index.Term; +import org.apache.lucene.index.values.ValueType; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.valuesource.BytesRefFieldSource; import org.apache.lucene.search.*; import org.apache.lucene.search.grouping.function.FunctionAllGroupsCollector; import org.apache.lucene.search.grouping.function.FunctionFirstPassGroupingCollector; import org.apache.lucene.search.grouping.function.FunctionSecondPassGroupingCollector; +import org.apache.lucene.search.grouping.idv.IDVFirstPassGroupingCollector; +import org.apache.lucene.search.grouping.idv.IDVSecondPassGroupingCollector; import org.apache.lucene.search.grouping.term.TermAllGroupsCollector; import org.apache.lucene.search.grouping.term.TermFirstPassGroupingCollector; import org.apache.lucene.search.grouping.term.TermSecondPassGroupingCollector; @@ -62,51 +61,52 @@ FieldType customType = new FieldType(); customType.setStored(true); - + Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter( random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); + boolean canUseIDV = !"PreFlex".equals(w.w.getConfig().getCodecProvider().getFieldCodec(groupField)); // 0 Document doc = new Document(); - doc.add(new Field(groupField, "author1", TextField.TYPE_STORED)); + addGroupField(doc, groupField, "author1", canUseIDV); doc.add(new Field("content", "random text", TextField.TYPE_STORED)); doc.add(new Field("id", "1", customType)); w.addDocument(doc); // 1 doc = new Document(); - doc.add(new Field(groupField, "author1", TextField.TYPE_STORED)); + addGroupField(doc, groupField, "author1", canUseIDV); doc.add(new Field("content", "some more random text", TextField.TYPE_STORED)); doc.add(new Field("id", "2", customType)); w.addDocument(doc); // 2 doc = new Document(); - doc.add(new Field(groupField, "author1", TextField.TYPE_STORED)); + addGroupField(doc, groupField, "author1", canUseIDV); doc.add(new Field("content", "some more random textual data", TextField.TYPE_STORED)); doc.add(new Field("id", "3", customType)); w.addDocument(doc); // 3 doc = new Document(); - doc.add(new Field(groupField, "author2", TextField.TYPE_STORED)); + addGroupField(doc, groupField, "author2", canUseIDV); doc.add(new Field("content", "some random text", TextField.TYPE_STORED)); doc.add(new Field("id", "4", customType)); w.addDocument(doc); // 4 doc = new Document(); - doc.add(new Field(groupField, "author3", TextField.TYPE_STORED)); + addGroupField(doc, groupField, "author3", canUseIDV); doc.add(new Field("content", "some more random text", TextField.TYPE_STORED)); doc.add(new Field("id", "5", customType)); w.addDocument(doc); // 5 doc = new Document(); - doc.add(new Field(groupField, "author3", TextField.TYPE_STORED)); + addGroupField(doc, groupField, "author3", canUseIDV); doc.add(new Field("content", "random", TextField.TYPE_STORED)); doc.add(new Field("id", "6", customType)); w.addDocument(doc); @@ -121,7 +121,7 @@ w.close(); final Sort groupSort = Sort.RELEVANCE; - final AbstractFirstPassGroupingCollector c1 = createRandomFirstPassCollector(groupField, groupSort, 10); + final AbstractFirstPassGroupingCollector c1 = createRandomFirstPassCollector(groupField, groupSort, 10, canUseIDV); indexSearcher.search(new TermQuery(new Term("content", "random")), c1); final AbstractSecondPassGroupingCollector c2 = createSecondPassCollector(c1, groupField, groupSort, null, 0, 5, true, false, true); @@ -167,14 +167,31 @@ dir.close(); } - private AbstractFirstPassGroupingCollector createRandomFirstPassCollector(String groupField, Sort groupSort, int topDocs) throws IOException { - if (random.nextBoolean()) { + private void addGroupField(Document doc, String groupField, String value, boolean canUseIDV) { + doc.add(new Field(groupField, value, TextField.TYPE_STORED)); + if (canUseIDV) { + IndexDocValuesField valuesField = new IndexDocValuesField(groupField); + valuesField.setBytes(new BytesRef(value), ValueType.BYTES_VAR_SORTED); + doc.add(valuesField); + } + } + + private AbstractFirstPassGroupingCollector createRandomFirstPassCollector(String groupField, Sort groupSort, int topDocs, boolean canUseIDV) throws IOException { + AbstractFirstPassGroupingCollector selected; + if (random.nextBoolean() && canUseIDV) { + boolean diskResident = random.nextBoolean(); + selected = IDVFirstPassGroupingCollector.create(groupSort, topDocs, groupField, ValueType.BYTES_VAR_SORTED, diskResident); + } else if (random.nextBoolean()) { ValueSource vs = new BytesRefFieldSource(groupField); - return new FunctionFirstPassGroupingCollector(vs, new HashMap(), groupSort, topDocs); + selected = new FunctionFirstPassGroupingCollector(vs, new HashMap(), groupSort, topDocs); } else { - return new TermFirstPassGroupingCollector(groupField, groupSort, topDocs); + selected = new TermFirstPassGroupingCollector(groupField, groupSort, topDocs); } + if (VERBOSE) { + System.out.println("Selected implementation: " + selected.getClass().getName()); - } + } + return selected; + } private AbstractSecondPassGroupingCollector createSecondPassCollector(AbstractFirstPassGroupingCollector firstPassGroupingCollector, String groupField, @@ -186,8 +203,13 @@ boolean getMaxScores, boolean fillSortFields) throws IOException { - if (firstPassGroupingCollector.getClass().isAssignableFrom(TermFirstPassGroupingCollector.class)) { + if (IDVFirstPassGroupingCollector.class.isAssignableFrom(firstPassGroupingCollector.getClass())) { + boolean diskResident = random.nextBoolean(); @SuppressWarnings("unchecked") + Collection searchGroups = firstPassGroupingCollector.getTopGroups(groupOffset, fillSortFields); + return IDVSecondPassGroupingCollector.create(groupField, diskResident, ValueType.BYTES_VAR_SORTED, searchGroups, groupSort, sortWithinGroup, maxDocsPerGroup, getScores, getMaxScores, fillSortFields); + } else if (TermFirstPassGroupingCollector.class.isAssignableFrom(firstPassGroupingCollector.getClass())) { + @SuppressWarnings("unchecked") Collection> searchGroups = firstPassGroupingCollector.getTopGroups(groupOffset, fillSortFields); return new TermSecondPassGroupingCollector(groupField, searchGroups, groupSort, sortWithinGroup, maxDocsPerGroup , getScores, getMaxScores, fillSortFields); } else { @@ -247,6 +269,8 @@ return; } else if (group.groupValue.getClass().isAssignableFrom(MutableValueStr.class)) { return; + } else if (((BytesRef) group.groupValue).length == 0) { + return; } fail(); } @@ -263,9 +287,9 @@ } private Collection> getSearchGroups(AbstractFirstPassGroupingCollector c, int groupOffset, boolean fillFields) { - if (c.getClass().isAssignableFrom(TermFirstPassGroupingCollector.class)) { + if (TermFirstPassGroupingCollector.class.isAssignableFrom(c.getClass())) { return ((TermFirstPassGroupingCollector) c).getTopGroups(groupOffset, fillFields); - } else if (c.getClass().isAssignableFrom(FunctionFirstPassGroupingCollector.class)) { + } else if (FunctionFirstPassGroupingCollector.class.isAssignableFrom(c.getClass())) { Collection> mutableValueGroups = ((FunctionFirstPassGroupingCollector) c).getTopGroups(groupOffset, fillFields); if (mutableValueGroups == null) { return null; @@ -279,6 +303,10 @@ groups.add(sg); } return groups; + } else if (IDVFirstPassGroupingCollector.class.isAssignableFrom(c.getClass())) { + @SuppressWarnings("unchecked") + Collection> topGroups = ((IDVFirstPassGroupingCollector) c).getTopGroups(groupOffset, fillFields); + return topGroups; } fail(); return null; @@ -296,6 +324,8 @@ groups.add(new GroupDocs(mvalGd.maxScore, mvalGd.totalHits, mvalGd.scoreDocs, groupValue, mvalGd.groupSortValues)); } return new TopGroups(mvalTopGroups.groupSort, mvalTopGroups.withinGroupSort, mvalTopGroups.totalHitCount, mvalTopGroups.totalGroupedHitCount, groups.toArray(new GroupDocs[groups.size()])); + } else if (IDVSecondPassGroupingCollector.class.isAssignableFrom(c.getClass())) { + return ((IDVSecondPassGroupingCollector) c).getTopGroups(withinGroupOffset); } fail(); return null; @@ -512,7 +542,7 @@ Collections.shuffle(Arrays.asList(groupDocs), random); final Map> groupMap = new HashMap>(); final List groupValues = new ArrayList(); - + for(GroupDoc groupDoc : groupDocs) { if (!groupMap.containsKey(groupDoc.group)) { groupValues.add(groupDoc.group); @@ -526,6 +556,7 @@ dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + boolean canUseIDV = !"PreFlex".equals(w.w.getConfig().getCodecProvider().getFieldCodec("group")); final List> updateDocs = new ArrayList>(); @@ -542,7 +573,12 @@ docs.add(doc); if (groupValue.group != null) { doc.add(newField("group", groupValue.group.utf8ToString(), StringField.TYPE_UNSTORED)); + if (canUseIDV) { + IndexDocValuesField valuesField = new IndexDocValuesField("group"); + valuesField.setBytes(new BytesRef(groupValue.group.utf8ToString()), ValueType.BYTES_VAR_SORTED); + doc.add(valuesField); - } + } + } doc.add(newField("sort1", groupValue.sort1.utf8ToString(), StringField.TYPE_UNSTORED)); doc.add(newField("sort2", groupValue.sort2.utf8ToString(), StringField.TYPE_UNSTORED)); doc.add(new NumericField("id").setIntValue(groupValue.id)); @@ -588,7 +624,7 @@ subSearchers[0] = new ShardSearcher((IndexReader.AtomicReaderContext) ctx, ctx); } else { final IndexReader.CompositeReaderContext compCTX = (IndexReader.CompositeReaderContext) ctx; - for(int searcherIDX=0;searcherIDX groups = new ArrayList(); for(int i=0;i topGroupsShards = searchShards(s, shards.subSearchers, query, groupSort, docSort, groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores); + final TopGroups topGroupsShards = searchShards(s, shards.subSearchers, query, groupSort, docSort, groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores, canUseIDV, preFlex); final AbstractSecondPassGroupingCollector c2; if (topGroups != null) { @@ -948,7 +999,7 @@ System.out.println(" id=" + docIDToID[sd.doc] + " score=" + sd.score); } } - + if (searchIter == 14) { for(int docIDX=0;docIDX>> shardGroups = new ArrayList>>(); List firstPassGroupingCollectors = new ArrayList(); for(int shardIDX=0;shardIDX> topGroups = getSearchGroups(c, 0, true); @@ -1166,11 +1223,11 @@ } assertNotNull(actual); - assertEquals(expected.groups.length, actual.groups.length); - assertEquals(expected.totalHitCount, actual.totalHitCount); - assertEquals(expected.totalGroupedHitCount, actual.totalGroupedHitCount); + assertEquals("expected.groups.length != actual.groups.length", expected.groups.length, actual.groups.length); + assertEquals("expected.totalHitCount != actual.totalHitCount", expected.totalHitCount, actual.totalHitCount); + assertEquals("expected.totalGroupedHitCount != actual.totalGroupedHitCount", expected.totalGroupedHitCount, actual.totalGroupedHitCount); if (expected.totalGroupCount != null && verifyTotalGroupCount) { - assertEquals(expected.totalGroupCount, actual.totalGroupCount); + assertEquals("expected.totalGroupCount != actual.totalGroupCount", expected.totalGroupCount, actual.totalGroupCount); } for(int groupIDX=0;groupIDX extends AbstractAllGroupsCollector { + + private static final int DEFAULT_INITIAL_SIZE = 128; + + /** + * Expert: Constructs a {@link IDVAllGroupsCollector}. + * Selects and constructs the most optimal all groups collector implementation for grouping by {@link IndexDocValues}. + * + * + * @param groupField The field to group by + * @param type The {@link ValueType} which is used to select a concrete implementation. + * @param diskResident Whether the values to group by should be disk resident + * @param initialSize The initial allocation size of the + * internal int set and group list + * which should roughly match the total + * number of expected unique groups. Be aware that the + * heap usage is 4 bytes * initialSize. Not all concrete implementions use this! + * @return the most optimal all groups collector implementation for grouping by {@link IndexDocValues} + */ + public static IDVAllGroupsCollector create(String groupField, ValueType type, boolean diskResident, int initialSize) { + switch (type) { + case VAR_INTS: + case FIXED_INTS_8: + case FIXED_INTS_16: + case FIXED_INTS_32: + case FIXED_INTS_64: + return new Lng(groupField, diskResident); + case FLOAT_32: + case FLOAT_64: + return new Dbl(groupField, diskResident); + case BYTES_FIXED_STRAIGHT: + case BYTES_FIXED_DEREF: + case BYTES_VAR_STRAIGHT: + case BYTES_VAR_DEREF: + return new BR(groupField, diskResident); + case BYTES_VAR_SORTED: + case BYTES_FIXED_SORTED: + return new SortedBR(groupField, diskResident, initialSize); + default: + throw new IllegalArgumentException(String.format("ValueType %s not supported", type)); + } + } + + /** + * Constructs a {@link IDVAllGroupsCollector}. + * Selects and constructs the most optimal all groups collector implementation for grouping by {@link IndexDocValues}. + * If implementations require an initial allocation size then this will be set to 128. + * + * + * @param groupField The field to group by + * @param type The {@link ValueType} which is used to select a concrete implementation. + * @param diskResident Wether the values to group by should be disk resident + * @return the most optimal all groups collector implementation for grouping by {@link IndexDocValues} + */ + public static IDVAllGroupsCollector create(String groupField, ValueType type, boolean diskResident) { + return create(groupField, type, diskResident, DEFAULT_INITIAL_SIZE); + } + + final String groupField; + final boolean diskResident; + final Collection groups; + + IDVAllGroupsCollector(String groupField, boolean diskResident, Collection groups) { + this.groupField = groupField; + this.diskResident = diskResident; + this.groups = groups; + } + + IndexDocValues.Source getSource(IndexReader ir) throws IOException { + return diskResident ? ir.perDocValues().docValues(groupField).getDirectSource() : + ir.perDocValues().docValues(groupField).getSource(); + } + + static class Lng extends IDVAllGroupsCollector { + + private IndexDocValues.Source source; + + Lng(String groupField, boolean diskResident) { + super(groupField, diskResident, new TreeSet()); + } + + public void collect(int doc) throws IOException { + long value = source.getInt(doc); + if (!groups.contains(value)) { + groups.add(value); + } + } + + public Collection getGroups() { + return groups; + } + + public void setNextReader(IndexReader.AtomicReaderContext context) throws IOException { + source = getSource(context.reader); + } + + } + + static class Dbl extends IDVAllGroupsCollector { + + private IndexDocValues.Source source; + + Dbl(String groupField, boolean diskResident) { + super(groupField, diskResident, new TreeSet()); + } + + public void collect(int doc) throws IOException { + double value = source.getFloat(doc); + if (!groups.contains(value)) { + groups.add(value); + } + } + + public Collection getGroups() { + return groups; + } + + public void setNextReader(IndexReader.AtomicReaderContext context) throws IOException { + source = getSource(context.reader); + } + + } + + static class BR extends IDVAllGroupsCollector { + + private final BytesRef spare = new BytesRef(); + + private IndexDocValues.Source source; + + BR(String groupField, boolean diskResident) { + super(groupField, diskResident, new TreeSet()); + } + + public void collect(int doc) throws IOException { + BytesRef value = source.getBytes(doc, spare); + if (!groups.contains(value)) { + groups.add(new BytesRef(value)); + } + } + + public Collection getGroups() { + return groups; + } + + public void setNextReader(IndexReader.AtomicReaderContext context) throws IOException { + source = getSource(context.reader); + } + + } + + static class SortedBR extends IDVAllGroupsCollector { + + private final SentinelIntSet ordSet; + private final BytesRef spare = new BytesRef(); + + private IndexDocValues.SortedSource source; + + SortedBR(String groupField, boolean diskResident, int initialSize) { + super(groupField, diskResident, new ArrayList(initialSize)); + ordSet = new SentinelIntSet(initialSize, -1); + } + + public void collect(int doc) throws IOException { + int ord = source.ord(doc); + if (!ordSet.exists(ord)) { + ordSet.put(ord); + BytesRef value = source.getBytes(doc, new BytesRef()); + groups.add(value); + } + } + + public Collection getGroups() { + return groups; + } + + public void setNextReader(IndexReader.AtomicReaderContext context) throws IOException { + source = getSource(context.reader).asSortedSource(); + ordSet.clear(); + for (BytesRef countedGroup : groups) { + int ord = source.getByValue(countedGroup, spare); + if (ord >= 0) { + ordSet.put(ord); + } + } + } + } + +} Index: modules/grouping/src/java/org/apache/lucene/search/grouping/idv/IDVSecondPassGroupingCollector.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/idv/IDVSecondPassGroupingCollector.java (revision ) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/idv/IDVSecondPassGroupingCollector.java (revision ) @@ -0,0 +1,205 @@ +package org.apache.lucene.search.grouping.idv; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.values.IndexDocValues; +import org.apache.lucene.index.values.ValueType; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.grouping.AbstractSecondPassGroupingCollector; +import org.apache.lucene.search.grouping.SearchGroup; +import org.apache.lucene.search.grouping.SentinelIntSet; +import org.apache.lucene.util.BytesRef; + +import java.io.IOException; +import java.io.Serializable; +import java.util.Collection; + +/** + * IDV based implementation of {@link AbstractSecondPassGroupingCollector}. + * + * @lucene.experimental + */ +public abstract class IDVSecondPassGroupingCollector extends AbstractSecondPassGroupingCollector { + + /** + * Constructs a {@link IDVSecondPassGroupingCollector}. + * Selects and constructs the most optimal second pass collector implementation for grouping by {@link IndexDocValues}. + * + * + * @param groupField The field to group by + * @param diskResident Whether the values to group by should be disk resident + * @param type The {@link org.apache.lucene.index.values.ValueType} which is used to select a concrete implementation. + * @param searchGroups The groups from the first phase search + * @param groupSort The sort used for the groups + * @param withinGroupSort The sort used for documents inside a group + * @param maxDocsPerGroup The maximum number of documents to collect per group + * @param getScores Whether to include scores for the documents inside a group + * @param getMaxScores Whether to keep track of the higest score per group + * @param fillSortFields Whether to include the sort values + * @return the most optimal second pass collector implementation for grouping by {@link IndexDocValues} + */ + @SuppressWarnings("unchecked") + public static IDVSecondPassGroupingCollector create(String groupField, + boolean diskResident, + ValueType type, + Collection searchGroups, + Sort groupSort, + Sort withinGroupSort, + int maxDocsPerGroup, + boolean getScores, + boolean getMaxScores, + boolean fillSortFields) throws IOException { + switch (type) { + case VAR_INTS: + case FIXED_INTS_8: + case FIXED_INTS_16: + case FIXED_INTS_32: + case FIXED_INTS_64: + // Type erasure b/c otherwise we have inconvertible types... + return new Lng(groupField, diskResident, (Collection) searchGroups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields); + case FLOAT_32: + case FLOAT_64: + // Type erasure b/c otherwise we have inconvertible types... + return new Dbl(groupField, diskResident, (Collection) searchGroups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields); + case BYTES_FIXED_STRAIGHT: + case BYTES_FIXED_DEREF: + case BYTES_VAR_STRAIGHT: + case BYTES_VAR_DEREF: + // Type erasure b/c otherwise we have inconvertible types... + return new BR(groupField, diskResident, (Collection) searchGroups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields); + case BYTES_VAR_SORTED: + case BYTES_FIXED_SORTED: + // Type erasure b/c otherwise we have inconvertible types... + return new SortedBR(groupField, diskResident, (Collection) searchGroups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields); + default: + throw new IllegalArgumentException(String.format("ValueType %s not supported", type)); + } + } + + final String groupField; + final boolean diskResident; + + IDVSecondPassGroupingCollector(String groupField, boolean diskResident, Collection> searchGroups, Sort groupSort, Sort withinGroupSort, int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields) throws IOException { + super(searchGroups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields); + this.groupField = groupField; + this.diskResident = diskResident; + } + + IndexDocValues.Source getSource(IndexReader ir) throws IOException { + return diskResident ? ir.perDocValues().docValues(groupField).getDirectSource() : + ir.perDocValues().docValues(groupField).getSource(); + } + + + static class Lng extends IDVSecondPassGroupingCollector { + + private IndexDocValues.Source source; + + Lng(String groupField, boolean diskResident, Collection> searchGroups, Sort groupSort, Sort withinGroupSort, int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields) throws IOException { + super(groupField, diskResident, searchGroups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields); + } + + protected SearchGroupDocs retrieveGroup(int doc) throws IOException { + return groupMap.get(source.getInt(doc)); + } + + @Override + public void setNextReader(IndexReader.AtomicReaderContext readerContext) throws IOException { + super.setNextReader(readerContext); + source = getSource(readerContext.reader); + } + } + + static class Dbl extends IDVSecondPassGroupingCollector { + + private IndexDocValues.Source source; + + Dbl(String groupField, boolean diskResident, Collection> searchGroups, Sort groupSort, Sort withinGroupSort, int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields) throws IOException { + super(groupField, diskResident, searchGroups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields); + } + + protected SearchGroupDocs retrieveGroup(int doc) throws IOException { + return groupMap.get(source.getFloat(doc)); + } + + @Override + public void setNextReader(IndexReader.AtomicReaderContext readerContext) throws IOException { + super.setNextReader(readerContext); + source = getSource(readerContext.reader); + } + } + + static class BR extends IDVSecondPassGroupingCollector { + + private IndexDocValues.Source source; + private final BytesRef spare = new BytesRef(); + + BR(String groupField, boolean diskResident, Collection> searchGroups, Sort groupSort, Sort withinGroupSort, int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields) throws IOException { + super(groupField, diskResident, searchGroups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields); + } + + protected SearchGroupDocs retrieveGroup(int doc) throws IOException { + return groupMap.get(source.getBytes(doc, spare)); + } + + @Override + public void setNextReader(IndexReader.AtomicReaderContext readerContext) throws IOException { + super.setNextReader(readerContext); + source = getSource(readerContext.reader); + } + } + + static class SortedBR extends IDVSecondPassGroupingCollector { + + private IndexDocValues.SortedSource source; + private final BytesRef spare = new BytesRef(); + private final SentinelIntSet ordSet; + + @SuppressWarnings("unchecked") + SortedBR(String groupField, boolean diskResident, Collection> searchGroups, Sort groupSort, Sort withinGroupSort, int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields) throws IOException { + super(groupField, diskResident, searchGroups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields); + ordSet = new SentinelIntSet(groupMap.size(), -1); + groupDocs = (SearchGroupDocs[]) new SearchGroupDocs[ordSet.keys.length]; + } + + protected SearchGroupDocs retrieveGroup(int doc) throws IOException { + int slot = ordSet.find(source.ord(doc)); + if (slot >= 0) { + return groupDocs[slot]; + } + + return null; + } + + @Override + public void setNextReader(IndexReader.AtomicReaderContext readerContext) throws IOException { + super.setNextReader(readerContext); + source = getSource(readerContext.reader).asSortedSource(); + + ordSet.clear(); + for (SearchGroupDocs group : groupMap.values()) { + int ord = source.getByValue(group.groupValue, spare); + if (ord >= 0) { + groupDocs[ordSet.put(ord)] = group; + } + } + } + } + +} Index: modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java =================================================================== --- modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java (revision 1187902) +++ modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java (revision ) @@ -18,19 +18,20 @@ */ import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.FieldType; -import org.apache.lucene.document.TextField; +import org.apache.lucene.document.*; +import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; +import org.apache.lucene.index.values.ValueType; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.valuesource.BytesRefFieldSource; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.grouping.function.FunctionAllGroupsCollector; +import org.apache.lucene.search.grouping.idv.IDVAllGroupsCollector; import org.apache.lucene.search.grouping.term.TermAllGroupsCollector; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; import java.io.IOException; @@ -46,27 +47,29 @@ Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter( - random, - dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); + random, + dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, + new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); + boolean canUseIDV = !"PreFlex".equals(w.w.getConfig().getCodecProvider().getFieldCodec(groupField)); + // 0 Document doc = new Document(); - doc.add(new Field(groupField, "author1", TextField.TYPE_STORED)); + addGroupField(doc, groupField, "author1", canUseIDV); doc.add(new Field("content", "random text", TextField.TYPE_STORED)); doc.add(new Field("id", "1", customType)); w.addDocument(doc); // 1 doc = new Document(); - doc.add(new Field(groupField, "author1", TextField.TYPE_STORED)); + addGroupField(doc, groupField, "author1", canUseIDV); doc.add(new Field("content", "some more random text blob", TextField.TYPE_STORED)); doc.add(new Field("id", "2", customType)); w.addDocument(doc); // 2 doc = new Document(); - doc.add(new Field(groupField, "author1", TextField.TYPE_STORED)); + addGroupField(doc, groupField, "author1", canUseIDV); doc.add(new Field("content", "some more random textual data", TextField.TYPE_STORED)); doc.add(new Field("id", "3", customType)); w.addDocument(doc); @@ -74,21 +77,21 @@ // 3 doc = new Document(); - doc.add(new Field(groupField, "author2", TextField.TYPE_STORED)); + addGroupField(doc, groupField, "author2", canUseIDV); doc.add(new Field("content", "some random text", TextField.TYPE_STORED)); doc.add(new Field("id", "4", customType)); w.addDocument(doc); // 4 doc = new Document(); - doc.add(new Field(groupField, "author3", TextField.TYPE_STORED)); + addGroupField(doc, groupField, "author3", canUseIDV); doc.add(new Field("content", "some more random text", TextField.TYPE_STORED)); doc.add(new Field("id", "5", customType)); w.addDocument(doc); // 5 doc = new Document(); - doc.add(new Field(groupField, "author3", TextField.TYPE_STORED)); + addGroupField(doc, groupField, "author3", canUseIDV); doc.add(new Field("content", "random blob", TextField.TYPE_STORED)); doc.add(new Field("id", "6", customType)); w.addDocument(doc); @@ -102,15 +105,15 @@ IndexSearcher indexSearcher = new IndexSearcher(w.getReader()); w.close(); - AbstractAllGroupsCollector c1 = createRandomCollector(groupField); + AbstractAllGroupsCollector c1 = createRandomCollector(groupField, canUseIDV); indexSearcher.search(new TermQuery(new Term("content", "random")), c1); assertEquals(4, c1.getGroupCount()); - AbstractAllGroupsCollector c2 = createRandomCollector(groupField); + AbstractAllGroupsCollector c2 = createRandomCollector(groupField, canUseIDV); indexSearcher.search(new TermQuery(new Term("content", "some")), c2); assertEquals(3, c2.getGroupCount()); - AbstractAllGroupsCollector c3 = createRandomCollector(groupField); + AbstractAllGroupsCollector c3 = createRandomCollector(groupField, canUseIDV); indexSearcher.search(new TermQuery(new Term("content", "blob")), c3); assertEquals(2, c3.getGroupCount()); @@ -118,13 +121,32 @@ dir.close(); } - private AbstractAllGroupsCollector createRandomCollector(String groupField) throws IOException { - if (random.nextBoolean()) { - return new TermAllGroupsCollector(groupField); + private void addGroupField(Document doc, String groupField, String value, boolean canUseIDV) { + doc.add(new Field(groupField, value, TextField.TYPE_STORED)); + if (canUseIDV) { + IndexDocValuesField valuesField = new IndexDocValuesField(groupField); + valuesField.setBytes(new BytesRef(value), ValueType.BYTES_VAR_SORTED); + doc.add(valuesField); + } + } + + private AbstractAllGroupsCollector createRandomCollector(String groupField, boolean canUseIDV) throws IOException { + AbstractAllGroupsCollector selected; + if (random.nextBoolean() && canUseIDV) { + boolean diskResident = random.nextBoolean(); + selected = IDVAllGroupsCollector.create(groupField, ValueType.BYTES_VAR_SORTED, diskResident); + } else if (random.nextBoolean()) { + selected = new TermAllGroupsCollector(groupField); } else { ValueSource vs = new BytesRefFieldSource(groupField); - return new FunctionAllGroupsCollector(vs, new HashMap()); + selected = new FunctionAllGroupsCollector(vs, new HashMap()); } + + if (VERBOSE) { + System.out.println("Selected implementation: " + selected.getClass().getName()); - } + } + return selected; -} + } + +} Index: modules/grouping/src/java/org/apache/lucene/search/grouping/idv/IDVFirstPassGroupingCollector.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/idv/IDVFirstPassGroupingCollector.java (revision ) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/idv/IDVFirstPassGroupingCollector.java (revision ) @@ -0,0 +1,184 @@ +package org.apache.lucene.search.grouping.idv; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.values.IndexDocValues; +import org.apache.lucene.index.values.ValueType; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.grouping.AbstractFirstPassGroupingCollector; +import org.apache.lucene.util.BytesRef; + +import java.io.IOException; + +/** + * IDV based Implementations of {@link AbstractFirstPassGroupingCollector}. + */ +public abstract class IDVFirstPassGroupingCollector extends AbstractFirstPassGroupingCollector { + + final String groupField; + final boolean diskResident; + + public static IDVFirstPassGroupingCollector create(Sort groupSort, int topNGroups, String groupField, ValueType type, boolean diskResident) throws IOException { + switch (type) { + case VAR_INTS: + case FIXED_INTS_8: + case FIXED_INTS_16: + case FIXED_INTS_32: + case FIXED_INTS_64: + return new Lng(groupSort, topNGroups, groupField, diskResident); + case FLOAT_32: + case FLOAT_64: + return new Dbl(groupSort, topNGroups, groupField, diskResident); + case BYTES_FIXED_STRAIGHT: + case BYTES_FIXED_DEREF: + case BYTES_VAR_STRAIGHT: + case BYTES_VAR_DEREF: + return new BR(groupSort, topNGroups, groupField, diskResident); + case BYTES_VAR_SORTED: + case BYTES_FIXED_SORTED: + return new SortedBR(groupSort, topNGroups, groupField, diskResident); + default: + throw new IllegalArgumentException(String.format("ValueType %s not supported", type)); + } + } + + IDVFirstPassGroupingCollector(Sort groupSort, int topNGroups, String groupField, boolean diskResident) throws IOException { + super(groupSort, topNGroups); + this.groupField = groupField; + this.diskResident = diskResident; + } + + IndexDocValues.Source getSource(IndexReader ir) throws IOException { + return diskResident ? ir.perDocValues().docValues(groupField).getDirectSource() : + ir.perDocValues().docValues(groupField).getSource(); + } + + + static class Lng extends IDVFirstPassGroupingCollector { + + private IndexDocValues.Source source; + + Lng(Sort groupSort, int topNGroups, String groupField, boolean diskResident) throws IOException { + super(groupSort, topNGroups, groupField, diskResident); + } + + protected Long getDocGroupValue(int doc) { + return source.getInt(doc); + } + + protected Long copyDocGroupValue(Long groupValue, Long reuse) { + return groupValue; + } + + @Override + public void setNextReader(IndexReader.AtomicReaderContext readerContext) throws IOException { + super.setNextReader(readerContext); + source = getSource(readerContext.reader); + } + } + + static class Dbl extends IDVFirstPassGroupingCollector { + + private IndexDocValues.Source source; + + Dbl(Sort groupSort, int topNGroups, String groupField, boolean diskResident) throws IOException { + super(groupSort, topNGroups, groupField, diskResident); + } + + protected Double getDocGroupValue(int doc) { + return source.getFloat(doc); + } + + protected Double copyDocGroupValue(Double groupValue, Double reuse) { + return groupValue; + } + + @Override + public void setNextReader(IndexReader.AtomicReaderContext readerContext) throws IOException { + super.setNextReader(readerContext); + source = getSource(readerContext.reader); + } + } + + static class BR extends IDVFirstPassGroupingCollector { + + private IndexDocValues.Source source; + private final BytesRef spare = new BytesRef(); + + BR(Sort groupSort, int topNGroups, String groupField, boolean diskResident) throws IOException { + super(groupSort, topNGroups, groupField, diskResident); + } + + protected BytesRef getDocGroupValue(int doc) { + return source.getBytes(doc, spare); + } + + protected BytesRef copyDocGroupValue(BytesRef groupValue, BytesRef reuse) { + /*if (groupValue == null) { + return null; + } else*/ if (reuse != null) { + reuse.copy(groupValue); + return reuse; + } else { + return new BytesRef(groupValue); + } + } + + @Override + public void setNextReader(IndexReader.AtomicReaderContext readerContext) throws IOException { + super.setNextReader(readerContext); + source = getSource(readerContext.reader); + + } + } + + static class SortedBR extends IDVFirstPassGroupingCollector { + + private IndexDocValues.SortedSource sortedSource; + private final BytesRef spare = new BytesRef(); + + SortedBR(Sort groupSort, int topNGroups, String groupField, boolean diskResident) throws IOException { + super(groupSort, topNGroups, groupField, diskResident); + } + + @Override + protected BytesRef getDocGroupValue(int doc) { + return sortedSource.getBytes(doc, spare); + } + + @Override + protected BytesRef copyDocGroupValue(BytesRef groupValue, BytesRef reuse) { + /*if (groupValue == null) { + return null; + } else */if (reuse != null) { + reuse.copy(groupValue); + return reuse; + } else { + return new BytesRef(groupValue); + } + } + + @Override + public void setNextReader(IndexReader.AtomicReaderContext readerContext) throws IOException { + super.setNextReader(readerContext); + sortedSource = getSource(readerContext.reader).asSortedSource(); + } + } + +}