Index: lucene/facet/src/java/org/apache/lucene/facet/index/params/CategoryListParams.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/facet/index/params/CategoryListParams.java (revision 1433930) +++ lucene/facet/src/java/org/apache/lucene/facet/index/params/CategoryListParams.java (working copy) @@ -6,12 +6,11 @@ import org.apache.lucene.facet.search.CategoryListIterator; import org.apache.lucene.facet.search.DocValuesCategoryListIterator; import org.apache.lucene.facet.util.PartitionsUtils; -import org.apache.lucene.util.encoding.DGapIntEncoder; +import org.apache.lucene.util.encoding.DGapVInt8IntEncoder; import org.apache.lucene.util.encoding.IntDecoder; import org.apache.lucene.util.encoding.IntEncoder; import org.apache.lucene.util.encoding.SortingIntEncoder; import org.apache.lucene.util.encoding.UniqueValuesIntEncoder; -import org.apache.lucene.util.encoding.VInt8IntEncoder; /* * Licensed to the Apache Software Foundation (ASF) under one or more @@ -78,7 +77,9 @@ * counting facets. */ public IntEncoder createEncoder() { - return new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapIntEncoder(new VInt8IntEncoder()))); + // nocommit +// return new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapIntEncoder(new VInt8IntEncoder()))); + return new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapVInt8IntEncoder())); } @Override Index: lucene/facet/src/java/org/apache/lucene/util/encoding/DGapVInt8IntDecoder.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/util/encoding/DGapVInt8IntDecoder.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/util/encoding/DGapVInt8IntDecoder.java (working copy) @@ -0,0 +1,67 @@ +package org.apache.lucene.util.encoding; + +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.RamUsageEstimator; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Decodes values encoded by {@link DGapVInt8IntDecoder}. + * + * @lucene.experimental + */ +public final class DGapVInt8IntDecoder extends IntDecoder { + + @Override + public void decode(BytesRef buf, IntsRef values) { + values.offset = values.length = 0; + + // grow the buffer up front, even if by a large number of values (buf.length) + // that saves the need to check inside the loop for every decoded value if + // the buffer needs to grow. + if (values.ints.length < buf.length) { + values.ints = new int[ArrayUtil.oversize(buf.length, RamUsageEstimator.NUM_BYTES_INT)]; + } + + // it is better if the decoding is inlined like so, and not e.g. + // in a utility method + int upto = buf.offset + buf.length; + int value = 0; + int offset = buf.offset; + int prev = 0; + while (offset < upto) { + byte b = buf.bytes[offset++]; + if (b >= 0) { + values.ints[values.length] = ((value << 7) | b) + prev; + value = 0; + prev = values.ints[values.length]; + values.length++; + } else { + value = (value << 7) | (b & 0x7F); + } + } + } + + @Override + public String toString() { + return "DGapVInt8"; + } + +} Property changes on: lucene/facet/src/java/org/apache/lucene/util/encoding/DGapVInt8IntDecoder.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/java/org/apache/lucene/util/encoding/DGapVInt8IntEncoder.java =================================================================== --- lucene/facet/src/java/org/apache/lucene/util/encoding/DGapVInt8IntEncoder.java (revision 0) +++ lucene/facet/src/java/org/apache/lucene/util/encoding/DGapVInt8IntEncoder.java (working copy) @@ -0,0 +1,89 @@ +package org.apache.lucene.util.encoding; + +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IntsRef; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * An {@link IntEncoder} which implements variable length encoding for the gap + * between values. It's a specialized form of the combination of + * {@link DGapIntEncoder} and {@link VInt8IntEncoder}. + * + * @see VInt8IntEncoder + * @see DGapIntEncoder + * + * @lucene.experimental + */ +public final class DGapVInt8IntEncoder extends IntEncoder { + + @Override + public void encode(IntsRef values, BytesRef buf) { + buf.offset = buf.length = 0; + int maxBytesNeeded = 5 * values.length; // at most 5 bytes per VInt + if (buf.bytes.length < maxBytesNeeded) { + buf.grow(maxBytesNeeded); + } + + int upto = values.offset + values.length; + int prev = 0; + for (int i = values.offset; i < upto; i++) { + // it is better if the encoding is inlined like so, and not e.g. + // in a utility method + int value = values.ints[i] - prev; + if ((value & ~0x7F) == 0) { + buf.bytes[buf.length] = (byte) value; + buf.length++; + } else if ((value & ~0x3FFF) == 0) { + buf.bytes[buf.length] = (byte) (0x80 | ((value & 0x3F80) >> 7)); + buf.bytes[buf.length + 1] = (byte) (value & 0x7F); + buf.length += 2; + } else if ((value & ~0x1FFFFF) == 0) { + buf.bytes[buf.length] = (byte) (0x80 | ((value & 0x1FC000) >> 14)); + buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0x3F80) >> 7)); + buf.bytes[buf.length + 2] = (byte) (value & 0x7F); + buf.length += 3; + } else if ((value & ~0xFFFFFFF) == 0) { + buf.bytes[buf.length] = (byte) (0x80 | ((value & 0xFE00000) >> 21)); + buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0x1FC000) >> 14)); + buf.bytes[buf.length + 2] = (byte) (0x80 | ((value & 0x3F80) >> 7)); + buf.bytes[buf.length + 3] = (byte) (value & 0x7F); + buf.length += 4; + } else { + buf.bytes[buf.length] = (byte) (0x80 | ((value & 0xF0000000) >> 28)); + buf.bytes[buf.length + 1] = (byte) (0x80 | ((value & 0xFE00000) >> 21)); + buf.bytes[buf.length + 2] = (byte) (0x80 | ((value & 0x1FC000) >> 14)); + buf.bytes[buf.length + 3] = (byte) (0x80 | ((value & 0x3F80) >> 7)); + buf.bytes[buf.length + 4] = (byte) (value & 0x7F); + buf.length += 5; + } + prev = values.ints[i]; + } + } + + @Override + public IntDecoder createMatchingDecoder() { + return new DGapVInt8IntDecoder(); + } + + @Override + public String toString() { + return "DGapVInt8"; + } + +} Property changes on: lucene/facet/src/java/org/apache/lucene/util/encoding/DGapVInt8IntEncoder.java ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Index: lucene/facet/src/test/org/apache/lucene/facet/index/params/CategoryListParamsTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/facet/index/params/CategoryListParamsTest.java (revision 1433930) +++ lucene/facet/src/test/org/apache/lucene/facet/index/params/CategoryListParamsTest.java (working copy) @@ -1,12 +1,11 @@ package org.apache.lucene.facet.index.params; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.encoding.DGapIntEncoder; +import org.apache.lucene.util.encoding.DGapVInt8IntEncoder; import org.apache.lucene.util.encoding.IntDecoder; import org.apache.lucene.util.encoding.IntEncoder; import org.apache.lucene.util.encoding.SortingIntEncoder; import org.apache.lucene.util.encoding.UniqueValuesIntEncoder; -import org.apache.lucene.util.encoding.VInt8IntEncoder; import org.junit.Test; /* @@ -32,7 +31,7 @@ public void testDefaultSettings() { CategoryListParams clp = new CategoryListParams(); assertEquals("wrong default field", "$facets", clp.field); - IntEncoder encoder = new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapIntEncoder(new VInt8IntEncoder()))); + IntEncoder encoder = new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapVInt8IntEncoder())); IntDecoder decoder = encoder.createMatchingDecoder(); assertEquals("unexpected default encoder", encoder.toString(), clp.createEncoder().toString()); assertEquals("unexpected default decoder", decoder.toString(), clp.createEncoder().createMatchingDecoder().toString()); Index: lucene/facet/src/test/org/apache/lucene/util/encoding/EncodingSpeed.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/util/encoding/EncodingSpeed.java (revision 1433930) +++ lucene/facet/src/test/org/apache/lucene/util/encoding/EncodingSpeed.java (working copy) @@ -77,6 +77,7 @@ encoderTest(new VInt8IntEncoder(), facetIDs, loopFactor); encoderTest(new SortingIntEncoder(new UniqueValuesIntEncoder(new VInt8IntEncoder())), facetIDs, loopFactor); encoderTest(new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapIntEncoder(new VInt8IntEncoder()))), facetIDs, loopFactor); + encoderTest(new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapVInt8IntEncoder())), facetIDs, loopFactor); encoderTest(new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapIntEncoder(new EightFlagsIntEncoder()))), facetIDs, loopFactor); encoderTest(new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapIntEncoder(new FourFlagsIntEncoder()))), facetIDs, loopFactor); encoderTest(new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapIntEncoder(new NOnesIntEncoder(3)))), facetIDs, loopFactor); Index: lucene/facet/src/test/org/apache/lucene/util/encoding/EncodingTest.java =================================================================== --- lucene/facet/src/test/org/apache/lucene/util/encoding/EncodingTest.java (revision 1433930) +++ lucene/facet/src/test/org/apache/lucene/util/encoding/EncodingTest.java (working copy) @@ -150,5 +150,10 @@ public void testSortingUniqueDGapNOnes3() throws Exception { encoderTest(new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapIntEncoder(new NOnesIntEncoder(3)))), data, uniqueSortedData); } + + @Test + public void testSortingUniqueDGapVInt() throws Exception { + encoderTest(new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapVInt8IntEncoder())), data, uniqueSortedData); + } }