Index: modules/grouping/src/test/org/apache/lucene/search/grouping/TermGroupFacetCollectorTest.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- modules/grouping/src/test/org/apache/lucene/search/grouping/TermGroupFacetCollectorTest.java (revision 1299435) +++ modules/grouping/src/test/org/apache/lucene/search/grouping/TermGroupFacetCollectorTest.java (revision ) @@ -26,6 +26,7 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.grouping.dv.DVGroupFacetCollector; import org.apache.lucene.search.grouping.term.TermGroupFacetCollector; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; @@ -47,47 +48,47 @@ dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); - boolean canUseIDV = false;// Enable later... !"Lucene3x".equals(w.w.getConfig().getCodec().getName()); + boolean canUseDV = !"Lucene3x".equals(w.w.getConfig().getCodec().getName()); + boolean useDv = canUseDV && random.nextBoolean(); // 0 Document doc = new Document(); - addGroupField(doc, groupField, "a", canUseIDV); - doc.add(new Field("airport", "ams", TextField.TYPE_UNSTORED)); - doc.add(new Field("duration", "5", TextField.TYPE_UNSTORED)); + addField(doc, groupField, "a", canUseDV); + addField(doc, "airport", "ams", canUseDV); + addField(doc, "duration", "5", canUseDV); w.addDocument(doc); // 1 doc = new Document(); - addGroupField(doc, groupField, "a", canUseIDV); - doc.add(new Field("airport", "dus", TextField.TYPE_STORED)); - doc.add(new Field("duration", "10", TextField.TYPE_UNSTORED)); + addField(doc, groupField, "a", canUseDV); + addField(doc, "airport", "dus", canUseDV); + addField(doc, "duration", "10", canUseDV); w.addDocument(doc); // 2 doc = new Document(); - addGroupField(doc, groupField, "b", canUseIDV); - doc.add(new Field("airport", "ams", TextField.TYPE_UNSTORED)); - doc.add(new Field("duration", "10", TextField.TYPE_UNSTORED)); + addField(doc, groupField, "b", canUseDV); + addField(doc, "airport", "ams", canUseDV); + addField(doc, "duration", "10", canUseDV); w.addDocument(doc); w.commit(); // To ensure a second segment // 3 doc = new Document(); - addGroupField(doc, groupField, "b", canUseIDV); - doc.add(new Field("airport", "ams", TextField.TYPE_UNSTORED)); - doc.add(new Field("duration", "5", TextField.TYPE_UNSTORED)); + addField(doc, groupField, "b", canUseDV); + addField(doc, "airport", "ams", canUseDV); + addField(doc, "duration", "5", canUseDV); w.addDocument(doc); // 4 doc = new Document(); - addGroupField(doc, groupField, "b", canUseIDV); - doc.add(new Field("airport", "ams", TextField.TYPE_UNSTORED)); - doc.add(new Field("duration", "5", TextField.TYPE_UNSTORED)); + addField(doc, groupField, "b", canUseDV); + addField(doc, "airport", "ams", canUseDV); + addField(doc, "duration", "5", canUseDV); w.addDocument(doc); IndexSearcher indexSearcher = new IndexSearcher(w.getReader()); - TermGroupFacetCollector groupedAirportFacetCollector = - TermGroupFacetCollector.createTermGroupFacetCollector(groupField, "airport", false, null, 128); + AbstractGroupFacetCollector groupedAirportFacetCollector = createRandomCollector(groupField, "airport", null, false, useDv); indexSearcher.search(new MatchAllDocsQuery(), groupedAirportFacetCollector); TermGroupFacetCollector.GroupedFacetResult airportResult = groupedAirportFacetCollector.mergeSegmentResults(10, 0, false); assertEquals(3, airportResult.getTotalCount()); @@ -101,8 +102,7 @@ assertEquals(1, entries.get(1).getCount()); - TermGroupFacetCollector groupedDurationFacetCollector = - TermGroupFacetCollector.createTermGroupFacetCollector(groupField, "duration", false, null, 128); + AbstractGroupFacetCollector groupedDurationFacetCollector = createRandomCollector(groupField, "duration", null, false, useDv); indexSearcher.search(new MatchAllDocsQuery(), groupedDurationFacetCollector); TermGroupFacetCollector.GroupedFacetResult durationResult = groupedDurationFacetCollector.mergeSegmentResults(10, 0, false); assertEquals(4, durationResult.getTotalCount()); @@ -117,34 +117,34 @@ // 5 doc = new Document(); - addGroupField(doc, groupField, "b", canUseIDV); - doc.add(new Field("duration", "5", TextField.TYPE_UNSTORED)); + addField(doc, groupField, "b", canUseDV); + addField(doc, "duration", "5", canUseDV); w.addDocument(doc); // 6 doc = new Document(); - addGroupField(doc, groupField, "b", canUseIDV); - doc.add(new Field("airport", "bru", TextField.TYPE_UNSTORED)); - doc.add(new Field("duration", "10", TextField.TYPE_UNSTORED)); + addField(doc, groupField, "b", canUseDV); + addField(doc, "airport", "bru", canUseDV); + addField(doc, "duration", "10", canUseDV); w.addDocument(doc); // 7 doc = new Document(); - addGroupField(doc, groupField, "b", canUseIDV); - doc.add(new Field("airport", "bru", TextField.TYPE_UNSTORED)); - doc.add(new Field("duration", "15", TextField.TYPE_UNSTORED)); + addField(doc, groupField, "b", canUseDV); + addField(doc, "airport", "bru", canUseDV); + addField(doc, "duration", "15", canUseDV); w.addDocument(doc); // 8 doc = new Document(); - addGroupField(doc, groupField, "a", canUseIDV); - doc.add(new Field("airport", "bru", TextField.TYPE_UNSTORED)); - doc.add(new Field("duration", "10", TextField.TYPE_UNSTORED)); + addField(doc, groupField, "a", canUseDV); + addField(doc, "airport", "bru", canUseDV); + addField(doc, "duration", "10", canUseDV); w.addDocument(doc); indexSearcher.getIndexReader().close(); indexSearcher = new IndexSearcher(w.getReader()); - groupedAirportFacetCollector = TermGroupFacetCollector.createTermGroupFacetCollector(groupField, "airport", true, null, 128); + groupedAirportFacetCollector = createRandomCollector(groupField, "airport", null, true, useDv); indexSearcher.search(new MatchAllDocsQuery(), groupedAirportFacetCollector); airportResult = groupedAirportFacetCollector.mergeSegmentResults(3, 0, true); assertEquals(5, airportResult.getTotalCount()); @@ -157,7 +157,7 @@ assertEquals("dus", entries.get(1).getValue().utf8ToString()); assertEquals(1, entries.get(1).getCount()); - groupedDurationFacetCollector = TermGroupFacetCollector.createTermGroupFacetCollector(groupField, "duration", false, null, 128); + groupedDurationFacetCollector = createRandomCollector(groupField, "duration", null, false, useDv); indexSearcher.search(new MatchAllDocsQuery(), groupedDurationFacetCollector); durationResult = groupedDurationFacetCollector.mergeSegmentResults(10, 2, true); assertEquals(5, durationResult.getTotalCount()); @@ -170,21 +170,21 @@ // 9 doc = new Document(); - addGroupField(doc, groupField, "c", canUseIDV); - doc.add(new Field("airport", "bru", TextField.TYPE_UNSTORED)); - doc.add(new Field("duration", "15", TextField.TYPE_UNSTORED)); + addField(doc, groupField, "c", canUseDV); + addField(doc, "airport", "bru", canUseDV); + addField(doc, "duration", "15", canUseDV); w.addDocument(doc); // 10 doc = new Document(); - addGroupField(doc, groupField, "c", canUseIDV); - doc.add(new Field("airport", "dus", TextField.TYPE_UNSTORED)); - doc.add(new Field("duration", "10", TextField.TYPE_UNSTORED)); + addField(doc, groupField, "c", canUseDV); + addField(doc, "airport", "dus", canUseDV); + addField(doc, "duration", "10", canUseDV); w.addDocument(doc); indexSearcher.getIndexReader().close(); indexSearcher = new IndexSearcher(w.getReader()); - groupedAirportFacetCollector = TermGroupFacetCollector.createTermGroupFacetCollector(groupField, "airport", false, null, 128); + groupedAirportFacetCollector = createRandomCollector(groupField, "airport", null, false, useDv); indexSearcher.search(new MatchAllDocsQuery(), groupedAirportFacetCollector); airportResult = groupedAirportFacetCollector.mergeSegmentResults(10, 0, false); assertEquals(7, airportResult.getTotalCount()); @@ -199,7 +199,7 @@ assertEquals("dus", entries.get(2).getValue().utf8ToString()); assertEquals(2, entries.get(2).getCount()); - groupedDurationFacetCollector = TermGroupFacetCollector.createTermGroupFacetCollector(groupField, "duration", false, new BytesRef("1"), 128); + groupedDurationFacetCollector = createRandomCollector(groupField, "duration", "1", false, useDv); indexSearcher.search(new MatchAllDocsQuery(), groupedDurationFacetCollector); durationResult = groupedDurationFacetCollector.mergeSegmentResults(10, 0, true); assertEquals(5, durationResult.getTotalCount()); @@ -217,10 +217,10 @@ dir.close(); } - private void addGroupField(Document doc, String groupField, String value, boolean canUseIDV) { - doc.add(new Field(groupField, value, TextField.TYPE_UNSTORED)); + private void addField(Document doc, String field, String value, boolean canUseIDV) { + doc.add(new Field(field, value, StringField.TYPE_UNSTORED)); if (canUseIDV) { - doc.add(new DocValuesField(groupField, new BytesRef(value), DocValues.Type.BYTES_VAR_SORTED)); + doc.add(new DocValuesField(field, new BytesRef(value), DocValues.Type.BYTES_VAR_SORTED)); } } @@ -232,6 +232,7 @@ final IndexSearcher searcher = newSearcher(context.indexReader); for (int searchIter = 0; searchIter < 100; searchIter++) { + boolean useDv = context.useDV && random.nextBoolean(); String searchTerm = context.contentStrings[random.nextInt(context.contentStrings.length)]; int limit = random.nextInt(context.facetValues.size()); int offset = random.nextInt(context.facetValues.size() - limit); @@ -254,7 +255,7 @@ } GroupedFacetResult expectedFacetResult = createExpectedFacetResult(searchTerm, context, offset, limit, minCount, orderByCount, facetPrefix); - TermGroupFacetCollector groupFacetCollector = createRandomCollector("group", "facet", facetPrefix, multipleFacetsPerDocument); + AbstractGroupFacetCollector groupFacetCollector = createRandomCollector("group", "facet", facetPrefix, multipleFacetsPerDocument, useDv); searcher.search(new TermQuery(new Term("content", searchTerm)), groupFacetCollector); TermGroupFacetCollector.GroupedFacetResult actualFacetResult = groupFacetCollector.mergeSegmentResults(size, minCount, orderByCount); @@ -357,20 +358,38 @@ new MockAnalyzer(random) ) ); + boolean canUseDV = !"Lucene3x".equals(writer.w.getConfig().getCodec().getName()); + boolean useDv = canUseDV && random.nextBoolean(); Document doc = new Document(); Document docNoGroup = new Document(); Document docNoFacet = new Document(); Document docNoGroupNoFacet = new Document(); Field group = newField("group", "", StringField.TYPE_UNSTORED); + DocValuesField groupDc = new DocValuesField("group", new BytesRef(), DocValues.Type.BYTES_VAR_SORTED); + if (useDv) { + doc.add(groupDc); + docNoFacet.add(groupDc); + } doc.add(group); docNoFacet.add(group); - Field[] facetFields = multipleFacetValuesPerDocument? new Field[2 + random.nextInt(6)] : new Field[1]; + Field[] facetFields; + if (useDv) { + facetFields = new Field[2]; + facetFields[0] = newField("facet", "", StringField.TYPE_UNSTORED); + doc.add(facetFields[0]); + docNoGroup.add(facetFields[0]); + facetFields[1] = new DocValuesField("facet", new BytesRef(), DocValues.Type.BYTES_VAR_SORTED); + doc.add(facetFields[1]); + docNoGroup.add(facetFields[1]); + } else { + facetFields = multipleFacetValuesPerDocument ? new Field[2 + random.nextInt(6)] : new Field[1]; - for (int i = 0; i < facetFields.length; i++) { - facetFields[i] = newField("facet", "", StringField.TYPE_UNSTORED); - doc.add(facetFields[i]); - docNoGroup.add(facetFields[i]); - } + for (int i = 0; i < facetFields.length; i++) { + facetFields[i] = newField("facet", "", StringField.TYPE_UNSTORED); + doc.add(facetFields[i]); + docNoGroup.add(facetFields[i]); + } + } Field content = newField("content", "", StringField.TYPE_UNSTORED); doc.add(content); docNoGroup.add(content); @@ -412,20 +431,36 @@ List facetVals = new ArrayList(); if (random.nextInt(24) != 18) { + if (useDv) { + String facetValue = facetValues.get(random.nextInt(facetValues.size())); + uniqueFacetValues.add(facetValue); + if (!facetToGroups.containsKey(facetValue)) { + facetToGroups.put(facetValue, new HashSet()); + } + Set groupsInFacet = facetToGroups.get(facetValue); + groupsInFacet.add(groupValue); + if (groupsInFacet.size() > facetWithMostGroups) { + facetWithMostGroups = groupsInFacet.size(); + } + facetFields[0].setStringValue(facetValue); + facetFields[1].setBytesValue(new BytesRef(facetValue)); + facetVals.add(facetValue); + } else { - for (Field facetField : facetFields) { - String facetValue = facetValues.get(random.nextInt(facetValues.size())); - uniqueFacetValues.add(facetValue); - if (!facetToGroups.containsKey(facetValue)) { - facetToGroups.put(facetValue, new HashSet()); - } - Set groupsInFacet = facetToGroups.get(facetValue); - groupsInFacet.add(groupValue); - if (groupsInFacet.size() > facetWithMostGroups) { - facetWithMostGroups = groupsInFacet.size(); - } - facetField.setStringValue(facetValue); - facetVals.add(facetValue); - } + for (Field facetField : facetFields) { + String facetValue = facetValues.get(random.nextInt(facetValues.size())); + uniqueFacetValues.add(facetValue); + if (!facetToGroups.containsKey(facetValue)) { + facetToGroups.put(facetValue, new HashSet()); + } + Set groupsInFacet = facetToGroups.get(facetValue); + groupsInFacet.add(groupValue); + if (groupsInFacet.size() > facetWithMostGroups) { + facetWithMostGroups = groupsInFacet.size(); + } + facetField.setStringValue(facetValue); + facetVals.add(facetValue); + } + } } else { uniqueFacetValues.add(null); if (!facetToGroups.containsKey(null)) { @@ -443,6 +478,9 @@ } if (groupValue != null) { + if (useDv) { + groupDc.setBytesValue(new BytesRef(groupValue)); + } group.setStringValue(groupValue); } content.setStringValue(contentStr); @@ -460,7 +498,7 @@ DirectoryReader reader = writer.getReader(); writer.close(); - return new IndexContext(searchTermToFacetToGroups, reader, numDocs, dir, facetWithMostGroups, numGroups, contentBrs, uniqueFacetValues); + return new IndexContext(searchTermToFacetToGroups, reader, numDocs, dir, facetWithMostGroups, numGroups, contentBrs, uniqueFacetValues, useDv); } private GroupedFacetResult createExpectedFacetResult(String searchTerm, IndexContext context, int offset, int limit, int minCount, final boolean orderByCount, String facetPrefix) { @@ -532,10 +570,15 @@ return new GroupedFacetResult(totalCount, totalMissCount, entriesResult); } - private TermGroupFacetCollector createRandomCollector(String groupField, String facetField, String facetPrefix, boolean multipleFacetsPerDocument) { + private AbstractGroupFacetCollector createRandomCollector(String groupField, String facetField, String facetPrefix, boolean multipleFacetsPerDocument, boolean useDv) { BytesRef facetPrefixBR = facetPrefix == null ? null : new BytesRef(facetPrefix); + if (useDv) { + return DVGroupFacetCollector.createDvGroupFacetCollector(groupField, DocValues.Type.BYTES_VAR_SORTED, + random.nextBoolean(), facetField, DocValues.Type.BYTES_VAR_SORTED, random.nextBoolean(), facetPrefixBR, random.nextInt(1024)); + } else { - return TermGroupFacetCollector.createTermGroupFacetCollector(groupField, facetField, multipleFacetsPerDocument, facetPrefixBR, random.nextInt(1024)); - } + return TermGroupFacetCollector.createTermGroupFacetCollector(groupField, facetField, multipleFacetsPerDocument, facetPrefixBR, random.nextInt(1024)); + } + } private String getFromSet(Set set, int index) { int currentIndex = 0; @@ -558,9 +601,10 @@ final int facetWithMostGroups; final int numGroups; final String[] contentStrings; + final boolean useDV; public IndexContext(Map>> searchTermToFacetGroups, DirectoryReader r, - int numDocs, Directory dir, int facetWithMostGroups, int numGroups, String[] contentStrings, NavigableSet facetValues) { + int numDocs, Directory dir, int facetWithMostGroups, int numGroups, String[] contentStrings, NavigableSet facetValues, boolean useDV) { this.searchTermToFacetGroups = searchTermToFacetGroups; this.indexReader = r; this.numDocs = numDocs; @@ -569,6 +613,7 @@ this.numGroups = numGroups; this.contentStrings = contentStrings; this.facetValues = facetValues; + this.useDV = useDV; } } Index: modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVGroupFacetCollector.java IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVGroupFacetCollector.java (revision ) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVGroupFacetCollector.java (revision ) @@ -0,0 +1,351 @@ +package org.apache.lucene.search.grouping.dv; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.search.grouping.AbstractGroupFacetCollector; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.PriorityQueue; +import org.apache.lucene.util.SentinelIntSet; +import org.apache.lucene.util.UnicodeUtil; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * An implementation of {@link AbstractGroupFacetCollector} that computes grouped facets based on docvalues. + * + * @lucene.experimental + */ +public abstract class DVGroupFacetCollector extends AbstractGroupFacetCollector { + + final DocValues.Type groupDvType; + final boolean groupDiskResident; + final DocValues.Type facetFieldDvType; + final boolean facetDiskResident; + + final List groupedFacetHits; + final SentinelIntSet segmentGroupedFacetHits; + final List segmentResults; + + int[] segmentFacetCounts; + int segmentTotalCount; + int startFacetOrd; + int endFacetOrd; + + /** + * Factory method for creating the right implementation based on the fact whether the facet field contains + * multiple tokens per documents. + * + * @param groupField The group field + * @param groupDvType + * @param groupDiskResident + * @param facetField The facet field + * @param facetDvType + * @param facetDiskResident + * @param facetPrefix The facet prefix a facet entry should start with to be included. + * @param initialSize The initial allocation size of the internal int set and group facet list which should roughly + * match the total number of expected unique groups. Be aware that the heap usage is + * 4 bytes * initialSize. + * @return DVGroupFacetCollector implementation + */ + public static DVGroupFacetCollector createDvGroupFacetCollector(String groupField, + DocValues.Type groupDvType, + boolean groupDiskResident, + String facetField, + DocValues.Type facetDvType, + boolean facetDiskResident, + BytesRef facetPrefix, + int initialSize) { + switch (groupDvType) { + case VAR_INTS: + case FIXED_INTS_8: + case FIXED_INTS_16: + case FIXED_INTS_32: + case FIXED_INTS_64: + case FLOAT_32: + case FLOAT_64: + case BYTES_FIXED_STRAIGHT: + case BYTES_FIXED_DEREF: + case BYTES_VAR_STRAIGHT: + case BYTES_VAR_DEREF: + throw new IllegalArgumentException(String.format("Group valueType %s not supported", groupDvType)); + case BYTES_VAR_SORTED: + case BYTES_FIXED_SORTED: + return GroupSortedBR.createGroupSortedFacetCollector(groupField, groupDvType, groupDiskResident, facetField, facetDvType, facetDiskResident, facetPrefix, initialSize); + default: + throw new IllegalArgumentException(String.format("Group valueType %s not supported", groupDvType)); + } + } + + DVGroupFacetCollector(String groupField, DocValues.Type groupDvType, boolean groupDiskResident, String facetField, DocValues.Type facetFieldDvType, boolean facetDiskResident, BytesRef facetPrefix, int initialSize) { + super(groupField, facetField, facetPrefix); + this.groupDvType = groupDvType; + this.groupDiskResident = groupDiskResident; + this.facetFieldDvType = facetFieldDvType; + this.facetDiskResident = facetDiskResident; + groupedFacetHits = new ArrayList(initialSize); + segmentGroupedFacetHits = new SentinelIntSet(initialSize, -1); + segmentResults = new ArrayList(); + } + + /** + * {@inheritDoc} + */ + public GroupedFacetResult mergeSegmentResults(int size, int minCount, boolean orderByCount) throws IOException { + if (segmentFacetCounts != null) { + segmentResults.add(createSegmentResult()); + segmentFacetCounts = null; // reset + } + + int totalCount = 0; + int missingCount = 0; + SegmentResultPriorityQueue segments = new SegmentResultPriorityQueue(segmentResults.size()); + for (SegmentResult segmentResult : segmentResults) { + missingCount += segmentResult.missing; + if (segmentResult.mergePos >= segmentResult.maxTermPos) { + continue; + } + totalCount += segmentResult.total; + segmentResult.initializeForMerge(); + segments.add(segmentResult); + } + + GroupedFacetResult facetResult = new GroupedFacetResult(size, minCount, orderByCount, totalCount, missingCount); + while (segments.size() > 0) { + SegmentResult segmentResult = segments.top(); + BytesRef currentFacetValue = BytesRef.deepCopyOf(segmentResult.mergeTerm); + int count = 0; + + do { + count += segmentResult.counts[segmentResult.mergePos++]; + if (segmentResult.mergePos < segmentResult.maxTermPos) { + segmentResult.nextTerm(); + segmentResult = segments.updateTop(); + } else { + segments.pop(); + segmentResult = segments.top(); + if (segmentResult == null) { + break; + } + } + } while (currentFacetValue.equals(segmentResult.mergeTerm)); + facetResult.addFacetCount(currentFacetValue, count); + } + return facetResult; + } + + protected abstract SegmentResult createSegmentResult(); + + static abstract class GroupSortedBR extends DVGroupFacetCollector { + + final BytesRef facetSpare = new BytesRef(); + final BytesRef groupSpare = new BytesRef(); + DocValues.SortedSource groupFieldSource; + + GroupSortedBR(String groupField, DocValues.Type groupDvType, boolean groupDiskResident, String facetField, DocValues.Type facetFieldDvType, boolean facetDiskResident, BytesRef facetPrefix, int initialSize) { + super(groupField, groupDvType, groupDiskResident, facetField, facetFieldDvType, facetDiskResident, facetPrefix, initialSize); + } + + static DVGroupFacetCollector createGroupSortedFacetCollector(String groupField, + DocValues.Type groupDvType, + boolean groupDiskResident, + String facetField, + DocValues.Type facetDvType, + boolean facetDiskResident, + BytesRef facetPrefix, + int initialSize) { + switch (facetDvType) { + case VAR_INTS: + case FIXED_INTS_8: + case FIXED_INTS_16: + case FIXED_INTS_32: + case FIXED_INTS_64: + case FLOAT_32: + case FLOAT_64: + case BYTES_FIXED_STRAIGHT: + case BYTES_FIXED_DEREF: + case BYTES_VAR_STRAIGHT: + case BYTES_VAR_DEREF: + throw new IllegalArgumentException(String.format("Facet valueType %s not supported", facetDvType)); + case BYTES_VAR_SORTED: + case BYTES_FIXED_SORTED: + return new FacetSortedBR(groupField, groupDvType, groupDiskResident, facetField, facetDvType, facetDiskResident, facetPrefix, initialSize); + default: + throw new IllegalArgumentException(String.format("Facet valueType %s not supported", facetDvType)); + } + } + + + static class FacetSortedBR extends GroupSortedBR { + + private DocValues.SortedSource facetFieldSource; + + FacetSortedBR(String groupField, DocValues.Type groupDvType, boolean groupDiskResident, String facetField, DocValues.Type facetDvType, boolean diskResident, BytesRef facetPrefix, int initialSize) { + super(groupField, groupDvType, groupDiskResident, facetField, facetDvType, diskResident, facetPrefix, initialSize); + } + + public void collect(int doc) throws IOException { + int facetOrd = facetFieldSource.ord(doc); + if (facetOrd < startFacetOrd || facetOrd >= endFacetOrd) { + return; + } + + int groupOrd = groupFieldSource.ord(doc); + int segmentGroupedFacetsIndex = (groupOrd * facetFieldSource.getValueCount()) + facetOrd; + if (segmentGroupedFacetHits.exists(segmentGroupedFacetsIndex)) { + return; + } + + segmentTotalCount++; + segmentFacetCounts[facetOrd]++; + + segmentGroupedFacetHits.put(segmentGroupedFacetsIndex); + groupedFacetHits.add( + new GroupedFacetHit( + groupFieldSource.getByOrd(groupOrd, new BytesRef()), + facetFieldSource.getByOrd(facetOrd, new BytesRef()) + ) + ); + } + + public void setNextReader(AtomicReaderContext context) throws IOException { + if (segmentFacetCounts != null) { + segmentResults.add(createSegmentResult()); + } + + groupFieldSource = getDocValuesSortedSource(groupField, groupDvType, groupDiskResident, context.reader()); + facetFieldSource = getDocValuesSortedSource(facetField, facetFieldDvType, facetDiskResident, context.reader()); + segmentFacetCounts = new int[facetFieldSource.getValueCount()]; + segmentTotalCount = 0; + + segmentGroupedFacetHits.clear(); + for (GroupedFacetHit groupedFacetHit : groupedFacetHits) { + int facetOrd = facetFieldSource.getOrdByValue(groupedFacetHit.facetValue, facetSpare); + if (facetOrd < 0) { + continue; + } + + int groupOrd = groupFieldSource.getOrdByValue(groupedFacetHit.groupValue, groupSpare); + if (groupOrd < 0) { + continue; + } + + int segmentGroupedFacetsIndex = (groupOrd * facetFieldSource.getValueCount()) + facetOrd; + segmentGroupedFacetHits.put(segmentGroupedFacetsIndex); + } + + if (facetPrefix != null) { + startFacetOrd = facetFieldSource.getOrdByValue(facetPrefix, facetSpare); + if (startFacetOrd < 0) { + // Points to the ord one higher than facetPrefix + startFacetOrd = -startFacetOrd - 1; + } + BytesRef facetEndPrefix = BytesRef.deepCopyOf(facetPrefix); + facetEndPrefix.append(UnicodeUtil.BIG_TERM); + endFacetOrd = facetFieldSource.getOrdByValue(facetEndPrefix, facetSpare); + endFacetOrd = -endFacetOrd - 1; // Points to the ord one higher than facetEndPrefix + } else { + startFacetOrd = 0; + endFacetOrd = facetFieldSource.getValueCount(); + } + } + + protected SegmentResult createSegmentResult() { + return new SegmentResult(segmentFacetCounts, segmentTotalCount, facetFieldSource, startFacetOrd, endFacetOrd); + } + + private DocValues.SortedSource getDocValuesSortedSource(String field, DocValues.Type dvType, boolean diskResident, AtomicReader reader) throws IOException { + DocValues dv = reader.docValues(field); + DocValues.Source dvSource; + if (dv != null) { + dvSource = diskResident ? dv.getDirectSource() : dv.getSource(); + } else { + dvSource = DocValues.getDefaultSortedSource(dvType, reader.maxDoc()); + } + return dvSource.asSortedSource(); + } + + } + } + +} + +class SegmentResult { + + final int[] counts; + final int total; + final int missing; + + final int maxTermPos; + final DocValues.SortedSource facetFieldSource; + final BytesRef spare = new BytesRef(); + + // Used for merging the segment results + BytesRef mergeTerm; + int mergePos; + + SegmentResult(int[] counts, int total, DocValues.SortedSource facetFieldSource, int startFacetOrd, int endFacetOrd) { + this.counts = counts; + if (startFacetOrd == 0 && facetFieldSource.getByOrd(startFacetOrd, spare).length == 0) { + this.missing = counts[0]; + this.total = total - missing; + this.mergePos = 1; + } else { + this.missing = 0; + this.total = total; + this.mergePos = startFacetOrd; + } + this.facetFieldSource = facetFieldSource; + this.maxTermPos = endFacetOrd; + } + + void initializeForMerge() throws IOException { + mergeTerm = facetFieldSource.getByOrd(mergePos, spare); + } + + void nextTerm() throws IOException { + mergeTerm = facetFieldSource.getByOrd(mergePos, spare); + } + +} + +class GroupedFacetHit { + + final BytesRef groupValue; + final BytesRef facetValue; + + GroupedFacetHit(BytesRef groupValue, BytesRef facetValue) { + this.groupValue = groupValue; + this.facetValue = facetValue; + } +} + +class SegmentResultPriorityQueue extends PriorityQueue { + + SegmentResultPriorityQueue(int maxSize) { + super(maxSize); + } + + protected boolean lessThan(SegmentResult a, SegmentResult b) { + return a.mergeTerm.compareTo(b.mergeTerm) < 0; + } +}