Index: lucene/src/test/org/apache/lucene/index/values/TestDocValues.java =================================================================== --- lucene/src/test/org/apache/lucene/index/values/TestDocValues.java (revision 1187902) +++ lucene/src/test/org/apache/lucene/index/values/TestDocValues.java (revision ) @@ -56,7 +56,7 @@ Directory dir = newDirectory(); final Counter trackBytes = Counter.newCounter(); - Writer w = Bytes.getWriter(dir, "test", mode, fixedSize, COMP, trackBytes, newIOContext(random)); + Writer w = Bytes.getWriter(dir, "test", mode, fixedSize, COMP, trackBytes, newIOContext(random), random.nextBoolean()); int maxDoc = 220; final String[] values = new String[maxDoc]; final int fixedLength = 1 + atLeast(50); Index: lucene/src/java/org/apache/lucene/index/values/Writer.java =================================================================== --- lucene/src/java/org/apache/lucene/index/values/Writer.java (revision 1187902) +++ lucene/src/java/org/apache/lucene/index/values/Writer.java (revision ) @@ -172,11 +172,12 @@ * the {@link Directory} to create the files from. * @param bytesUsed * a byte-usage tracking reference + * @param optimizeBytesDocValuesForSpeed Whether the space used for DV should be rounded up for higher lookup performance. * @return a new {@link Writer} instance for the given {@link ValueType} * @throws IOException */ public static Writer create(ValueType type, String id, Directory directory, - Comparator comp, Counter bytesUsed, IOContext context) throws IOException { + Comparator comp, Counter bytesUsed, IOContext context, boolean optimizeBytesDocValuesForSpeed) throws IOException { if (comp == null) { comp = BytesRef.getUTF8SortedAsUnicodeComparator(); } @@ -193,22 +194,22 @@ return Floats.getWriter(directory, id, bytesUsed, context, type); case BYTES_FIXED_STRAIGHT: return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, true, comp, - bytesUsed, context); + bytesUsed, context, optimizeBytesDocValuesForSpeed); case BYTES_FIXED_DEREF: return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, true, comp, - bytesUsed, context); + bytesUsed, context, optimizeBytesDocValuesForSpeed); case BYTES_FIXED_SORTED: return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, true, comp, - bytesUsed, context); + bytesUsed, context, optimizeBytesDocValuesForSpeed); case BYTES_VAR_STRAIGHT: return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, false, comp, - bytesUsed, context); + bytesUsed, context, optimizeBytesDocValuesForSpeed); case BYTES_VAR_DEREF: return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, false, comp, - bytesUsed, context); + bytesUsed, context, optimizeBytesDocValuesForSpeed); case BYTES_VAR_SORTED: return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, false, comp, - bytesUsed, context); + bytesUsed, context, optimizeBytesDocValuesForSpeed); default: throw new IllegalArgumentException("Unknown Values: " + type); } Index: lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java =================================================================== --- lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java (revision 1187902) +++ lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java (revision ) @@ -53,9 +53,10 @@ private final Comparator comp; public Writer(Directory dir, String id, Comparator comp, - Counter bytesUsed, IOContext context) throws IOException { + Counter bytesUsed, IOContext context, boolean optimizePackedForSpeed) throws IOException { super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context); this.comp = comp; + this.optimizePackedForSpeed = optimizePackedForSpeed; } @Override Index: lucene/src/java/org/apache/lucene/index/codecs/DocValuesWriterBase.java =================================================================== --- lucene/src/java/org/apache/lucene/index/codecs/DocValuesWriterBase.java (revision 1187902) +++ lucene/src/java/org/apache/lucene/index/codecs/DocValuesWriterBase.java (revision ) @@ -30,6 +30,7 @@ /** * Abstract base class for PerDocConsumer implementations + * * @lucene.experimental */ public abstract class DocValuesWriterBase extends PerDocConsumer { @@ -37,12 +38,23 @@ private final int codecId; private final Counter bytesUsed; private final IOContext context; + private final boolean sortedBytesFasterButMoreRam; protected DocValuesWriterBase(PerDocWriteState state) { + this(state, true); + } + + /** + * @param state The state to initiate a {@link PerDocConsumer} instance + * @param sortedBytesFasterButMoreRam whether docvalues of type sorted bytes should be optimized for speed by rounding + * up the bytes used for a value to either 8, 16, 32 or 64 bytes. + */ + protected DocValuesWriterBase(PerDocWriteState state, boolean sortedBytesFasterButMoreRam) { this.segmentName = state.segmentName; this.codecId = state.codecId; this.bytesUsed = state.bytesUsed; this.context = state.context; + this.sortedBytesFasterButMoreRam = sortedBytesFasterButMoreRam; } protected abstract Directory getDirectory(); @@ -53,9 +65,8 @@ @Override public DocValuesConsumer addValuesField(FieldInfo field) throws IOException { - return Writer.create(field.getDocValues(), - docValuesId(segmentName, codecId, field.number), - getDirectory(), getComparator(), bytesUsed, context); + return Writer.create(field.getDocValues(), docValuesId(segmentName, codecId, field.number), + getDirectory(), getComparator(), bytesUsed, context, sortedBytesFasterButMoreRam); } public static String docValuesId(String segmentsName, int codecID, int fieldId) { Index: lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java =================================================================== --- lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java (revision 1187902) +++ lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java (revision ) @@ -54,10 +54,11 @@ private final Comparator comp; public Writer(Directory dir, String id, Comparator comp, - Counter bytesUsed, IOContext context) throws IOException { + Counter bytesUsed, IOContext context, boolean optimizePackedForSpeed) throws IOException { super(dir, id, CODEC_NAME, VERSION_CURRENT, bytesUsed, context); this.comp = comp; size = 0; + this.optimizePackedForSpeed = optimizePackedForSpeed; } @Override public void merge(MergeState mergeState, IndexDocValues[] docValues) @@ -119,8 +120,7 @@ final int[] sortedEntries = hash.sort(comp); // total bytes of data idxOut.writeLong(maxBytes); - PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, count+1, - PackedInts.bitsRequired(maxBytes)); + PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, count+1, bitsRequired(maxBytes)); // first dump bytes data, recording index & write offset as // we go final BytesRef spare = new BytesRef(); Index: lucene/src/java/org/apache/lucene/index/values/Bytes.java =================================================================== --- lucene/src/java/org/apache/lucene/index/values/Bytes.java (revision 1187902) +++ lucene/src/java/org/apache/lucene/index/values/Bytes.java (revision ) @@ -32,17 +32,17 @@ import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.ByteBlockPool.Allocator; +import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator; import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray; import org.apache.lucene.util.BytesRefHash; import org.apache.lucene.util.CodecUtil; import org.apache.lucene.util.Counter; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.PagedBytes; import org.apache.lucene.util.RamUsageEstimator; -import org.apache.lucene.util.ByteBlockPool.Allocator; -import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator; -import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray; import org.apache.lucene.util.packed.PackedInts; /** @@ -109,13 +109,15 @@ * {@link Writer}. A call to {@link Writer#finish(int)} will release * all internally used resources and frees the memory tracking * reference. + * @param sortedBytesFasterButMoreRam Whether the space used for DV should be rounded up for better lookup performance. * @param context * @return a new {@link Writer} instance * @throws IOException * if the files for the writer can not be created. */ public static Writer getWriter(Directory dir, String id, Mode mode, - boolean fixedSize, Comparator sortComparator, Counter bytesUsed, IOContext context) + boolean fixedSize, Comparator sortComparator, + Counter bytesUsed, IOContext context, boolean sortedBytesFasterButMoreRam) throws IOException { // TODO -- i shouldn't have to specify fixed? can // track itself & do the write thing at write time? @@ -129,7 +131,7 @@ } else if (mode == Mode.DEREF) { return new FixedDerefBytesImpl.Writer(dir, id, bytesUsed, context); } else if (mode == Mode.SORTED) { - return new FixedSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context); + return new FixedSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context, sortedBytesFasterButMoreRam); } } else { if (mode == Mode.STRAIGHT) { @@ -137,7 +139,7 @@ } else if (mode == Mode.DEREF) { return new VarDerefBytesImpl.Writer(dir, id, bytesUsed, context); } else if (mode == Mode.SORTED) { - return new VarSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context); + return new VarSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context, sortedBytesFasterButMoreRam); } } @@ -393,6 +395,7 @@ protected int lastDocId = -1; protected int[] docToEntry; protected final BytesRefHash hash; + protected boolean optimizePackedForSpeed = false; protected long maxBytes = 0; protected DerefBytesWriterBase(Directory dir, String id, String codecName, @@ -506,8 +509,7 @@ protected void writeIndex(IndexOutput idxOut, int docCount, long maxValue, int[] addresses, int[] toEntry) throws IOException { - final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount, - PackedInts.bitsRequired(maxValue)); + final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount, bitsRequired(maxValue)); final int limit = docCount > docToEntry.length ? docToEntry.length : docCount; assert toEntry.length >= limit -1; @@ -530,8 +532,7 @@ protected void writeIndex(IndexOutput idxOut, int docCount, long maxValue, long[] addresses, int[] toEntry) throws IOException { - final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount, - PackedInts.bitsRequired(maxValue)); + final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount, bitsRequired(maxValue)); final int limit = docCount > docToEntry.length ? docToEntry.length : docCount; assert toEntry.length >= limit -1; @@ -551,9 +552,14 @@ } w.finish(); } - + + protected int bitsRequired(long maxValue){ + return optimizePackedForSpeed ? + PackedInts.getNextFixedSize(PackedInts.bitsRequired(maxValue)) : PackedInts.bitsRequired(maxValue); - } - + } + + } + static abstract class BytesSortedSourceBase extends SortedSource { private final PagedBytes pagedBytes;