Index: lucene/src/java/org/apache/lucene/index/values/Bytes.java =================================================================== --- lucene/src/java/org/apache/lucene/index/values/Bytes.java (revision 1180123) +++ lucene/src/java/org/apache/lucene/index/values/Bytes.java (working copy) @@ -393,6 +393,7 @@ protected int lastDocId = -1; protected int[] docToEntry; protected final BytesRefHash hash; + protected boolean optimizePackedForSpeed = false; protected DerefBytesWriterBase(Directory dir, String id, String codecName, int codecVersion, Counter bytesUsed, IOContext context) @@ -503,7 +504,7 @@ protected void writeIndex(IndexOutput idxOut, int docCount, long maxValue, int[] addresses, int[] toEntry) throws IOException { final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount, - PackedInts.bitsRequired(maxValue)); + bitsRequired(maxValue)); final int limit = docCount > docToEntry.length ? docToEntry.length : docCount; assert toEntry.length >= limit -1; @@ -527,7 +528,7 @@ protected void writeIndex(IndexOutput idxOut, int docCount, long maxValue, long[] addresses, int[] toEntry) throws IOException { final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount, - PackedInts.bitsRequired(maxValue)); + bitsRequired(maxValue)); final int limit = docCount > docToEntry.length ? docToEntry.length : docCount; assert toEntry.length >= limit -1; @@ -548,6 +549,11 @@ w.finish(); } + protected int bitsRequired(long maxValue){ + return optimizePackedForSpeed ? PackedInts.getNextFixedSize(PackedInts + .bitsRequired(maxValue)) : PackedInts.bitsRequired(maxValue); + } + } static abstract class BytesSortedSourceBase extends SortedSource { Index: lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java =================================================================== --- lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java (revision 1180123) +++ lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java (working copy) @@ -51,6 +51,7 @@ Counter bytesUsed, IOContext context) throws IOException { super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context); this.comp = comp; + this.optimizePackedForSpeed = true; } // Important that we get docCount, in case there were Index: lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java =================================================================== --- lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java (revision 1180123) +++ lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java (working copy) @@ -53,6 +53,7 @@ super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context); this.comp = comp; size = 0; + this.optimizePackedForSpeed = true; } @Override @@ -90,7 +91,7 @@ writeIndex(idxOut, docCount, count, index, docToEntry); // next ord (0-based) -> offset PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, count+1, - PackedInts.bitsRequired(offset)); + bitsRequired(offset)); for (int i = 0; i < count; i++) { offsetWriter.add(offsets[i]); } Index: modules/grouping/src/java/org/apache/lucene/search/grouping/indexdocvalues/IndexDocValuesAllGroupsCollector.java =================================================================== --- modules/grouping/src/java/org/apache/lucene/search/grouping/indexdocvalues/IndexDocValuesAllGroupsCollector.java (revision 0) +++ modules/grouping/src/java/org/apache/lucene/search/grouping/indexdocvalues/IndexDocValuesAllGroupsCollector.java (revision 0) @@ -0,0 +1,197 @@ +package org.apache.lucene.search.grouping.indexdocvalues; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.values.IndexDocValues; +import org.apache.lucene.index.values.ValueType; +import org.apache.lucene.search.grouping.AbstractAllGroupsCollector; +import org.apache.lucene.search.grouping.SentinelIntSet; +import org.apache.lucene.util.BytesRef; + +import java.io.IOException; +import java.util.*; + +/** + * Implementation of {@link AbstractAllGroupsCollector} that groups documents based on + * {@link IndexDocValues} fields. + */ +public abstract class IndexDocValuesAllGroupsCollector extends AbstractAllGroupsCollector { + + + /** + * Constructs a {@link IndexDocValuesAllGroupsCollector}. + * Selects and constructs the most optimal all groups collector implementation for grouping by {@link IndexDocValues}. + * + * + * @param groupField The field to group by + * @param type The {@link ValueType} which is used to select a concrete implementation. + * @param initialSize The initial allocation size of the + * internal int set and group list + * which should roughly match the total + * number of expected unique groups. Be aware that the + * heap usage is 4 bytes * initialSize. Not all concrete implementions use this! + * @return the most optimal all groups collector implementation for grouping by {@link IndexDocValues} + */ + public static IndexDocValuesAllGroupsCollector create(String groupField, ValueType type, int initialSize) { + switch (type) { + case VAR_INTS: + case FIXED_INTS_8: + case FIXED_INTS_16: + case FIXED_INTS_32: + case FIXED_INTS_64: + return new Lng(groupField); + case FLOAT_32: + case FLOAT_64: + return new Dbl(groupField); + case BYTES_FIXED_STRAIGHT: + case BYTES_FIXED_DEREF: + case BYTES_VAR_STRAIGHT: + case BYTES_VAR_DEREF: + return new BR(groupField); + case BYTES_VAR_SORTED: + case BYTES_FIXED_SORTED: + return new SortedBR(groupField, initialSize); + default: + throw new IllegalArgumentException(String.format("ValueType %s not supported", type)); + } + } + + final String groupField; + final Collection groups; + + IndexDocValuesAllGroupsCollector(String groupField, Collection groups) { + this.groupField = groupField; + this.groups = groups; + } + + static class Lng extends IndexDocValuesAllGroupsCollector { + + private IndexDocValues.Source source; + + Lng(String groupField) { + super(groupField, new TreeSet()); + } + + public void collect(int doc) throws IOException { + long value = source.getInt(doc); + if (!groups.contains(value)) { + groups.add(value); + } + } + + public Collection getGroups() { + return groups; + } + + public void setNextReader(IndexReader.AtomicReaderContext context) throws IOException { + source = context.reader.perDocValues().docValues(groupField).getSource(); + } + + } + + static class Dbl extends IndexDocValuesAllGroupsCollector { + + private IndexDocValues.Source source; + + Dbl(String groupField) { + super(groupField, new TreeSet()); + } + + public void collect(int doc) throws IOException { + double value = source.getFloat(doc); + if (!groups.contains(value)) { + groups.add(value); + } + } + + public Collection getGroups() { + return groups; + } + + public void setNextReader(IndexReader.AtomicReaderContext context) throws IOException { + source = context.reader.perDocValues().docValues(groupField).getSource(); + } + + } + + static class BR extends IndexDocValuesAllGroupsCollector { + + private final BytesRef spare = new BytesRef(); + + private IndexDocValues.Source source; + + BR(String groupField) { + super(groupField, new TreeSet()); + } + + public void collect(int doc) throws IOException { + BytesRef value = source.getBytes(doc, spare); + if (!groups.contains(value)) { + groups.add(new BytesRef(value)); + } + } + + public Collection getGroups() { + return groups; + } + + public void setNextReader(IndexReader.AtomicReaderContext context) throws IOException { + source = context.reader.perDocValues().docValues(groupField).getSource(); + } + + } + + static class SortedBR extends IndexDocValuesAllGroupsCollector { + + private final SentinelIntSet ordSet; + private final BytesRef spare = new BytesRef(); + + private IndexDocValues.SortedSource source; + + SortedBR(String groupField, int initialSize) { + super(groupField, new ArrayList(initialSize)); + ordSet = new SentinelIntSet(initialSize, -1); + } + + public void collect(int doc) throws IOException { + int ord = source.ord(doc); + if (!ordSet.exists(ord)) { + ordSet.put(ord); + BytesRef value = ord == 0 ? null : source.getBytes(doc, new BytesRef()); + groups.add(value); + } + } + + public Collection getGroups() { + return groups; + } + + public void setNextReader(IndexReader.AtomicReaderContext context) throws IOException { + source = context.reader.perDocValues().docValues(groupField).getSource().asSortedSource(); + ordSet.clear(); + for (BytesRef countedGroup : groups) { + int ord = countedGroup == null ? 0 : source.getByValue(countedGroup, spare); + if (ord >= 0) { + ordSet.put(ord); + } + } + } + } + +} Property changes on: modules/grouping/src/java/org/apache/lucene/search/grouping/indexdocvalues/IndexDocValuesAllGroupsCollector.java ___________________________________________________________________ Added: svn:eol-style + native Added: svn:keywords + Date Author Id Revision HeadURL