Index: backwards/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java =================================================================== --- backwards/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java (revision 928371) +++ backwards/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java (working copy) @@ -1,73 +0,0 @@ -package org.apache.lucene.analysis; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.util.NumericUtils; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; -import org.apache.lucene.analysis.tokenattributes.TypeAttribute; - -public class TestNumericTokenStream extends BaseTokenStreamTestCase { - - static final long lvalue = 4573245871874382L; - static final int ivalue = 123456; - - public void testLongStream() throws Exception { - final NumericTokenStream stream=new NumericTokenStream().setLongValue(lvalue); - // use getAttribute to test if attributes really exist, if not an IAE will be throwed - final TermAttribute termAtt = stream.getAttribute(TermAttribute.class); - final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class); - for (int shift=0; shift<64; shift+=NumericUtils.PRECISION_STEP_DEFAULT) { - assertTrue("New token is available", stream.incrementToken()); - assertEquals("Term is correctly encoded", NumericUtils.longToPrefixCoded(lvalue, shift), termAtt.term()); - assertEquals("Type correct", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type()); - } - assertFalse("No more tokens available", stream.incrementToken()); - } - - public void testIntStream() throws Exception { - final NumericTokenStream stream=new NumericTokenStream().setIntValue(ivalue); - // use getAttribute to test if attributes really exist, if not an IAE will be throwed - final TermAttribute termAtt = stream.getAttribute(TermAttribute.class); - final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class); - for (int shift=0; shift<32; shift+=NumericUtils.PRECISION_STEP_DEFAULT) { - assertTrue("New token is available", stream.incrementToken()); - assertEquals("Term is correctly encoded", NumericUtils.intToPrefixCoded(ivalue, shift), termAtt.term()); - assertEquals("Type correct", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type()); - } - assertFalse("No more tokens available", stream.incrementToken()); - } - - public void testNotInitialized() throws Exception { - final NumericTokenStream stream=new NumericTokenStream(); - - try { - stream.reset(); - fail("reset() should not succeed."); - } catch (IllegalStateException e) { - // pass - } - - try { - stream.incrementToken(); - fail("incrementToken() should not succeed."); - } catch (IllegalStateException e) { - // pass - } - } - -} Index: src/java/org/apache/lucene/analysis/NumericTokenStream.java =================================================================== --- src/java/org/apache/lucene/analysis/NumericTokenStream.java (revision 928371) +++ src/java/org/apache/lucene/analysis/NumericTokenStream.java (working copy) @@ -17,12 +17,17 @@ * limitations under the License. */ +import org.apache.lucene.util.Attribute; +import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; import org.apache.lucene.document.NumericField; // for javadocs import org.apache.lucene.search.NumericRangeQuery; // for javadocs import org.apache.lucene.search.NumericRangeFilter; // for javadocs +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; @@ -91,7 +96,89 @@ /** The lower precision tokens gets this token type assigned. */ public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecNumeric"; + + /** Expert: Use this attribute to get the details of the currently generated token + * @lucene.experimental + * @since 3.1 + */ + public interface NumericTermAttribute extends Attribute { + /** Returns current shift value, undefined before first token */ + int getShift(); + /** Returns {@link NumericTokenStream}'s raw value as {@code long} */ + long getRawValue(); + /** Returns value size in bits (32 for {@code float}, {@code int}; 64 for {@code double}, {@code long}) */ + int getValueSize(); + } + + private static final class NumericAttributeFactory extends AttributeFactory { + private final AttributeFactory delegate; + private NumericTokenStream ts = null; + NumericAttributeFactory(AttributeFactory delegate) { + this.delegate = delegate; + } + + @Override + public AttributeImpl createAttributeInstance(Class attClass) { + if (attClass == NumericTermAttribute.class) + return new NumericTermAttributeImpl(ts); + if (attClass.isAssignableFrom(CharTermAttribute.class) || attClass.isAssignableFrom(TermAttribute.class)) + throw new IllegalArgumentException("NumericTokenStream does not support CharTermAttribute/TermAttribute."); + return delegate.createAttributeInstance(attClass); + } + } + + private static final class NumericTermAttributeImpl extends AttributeImpl implements NumericTermAttribute,TermToBytesRefAttribute { + private final NumericTokenStream ts; + + public NumericTermAttributeImpl(NumericTokenStream ts) { + this.ts = ts; + } + + public int toBytesRef(BytesRef bytes) { + try { + assert ts.valSize == 64 || ts.valSize == 32; + return (ts.valSize == 64) ? + NumericUtils.longToPrefixCoded(ts.value, ts.shift, bytes) : + NumericUtils.intToPrefixCoded((int) ts.value, ts.shift, bytes); + } catch (IllegalArgumentException iae) { + // return empty token before first + bytes.length = 0; + return 0; + } + } + + public int getShift() { return ts.shift; } + public long getRawValue() { return ts.value; } + public int getValueSize() { return ts.valSize; } + + @Override + public void clear() { + // this attribute has no contents to clear + } + + @Override + public boolean equals(Object other) { + return other == this; + } + + @Override + public int hashCode() { + return System.identityHashCode(this); + } + + @Override + public void copyTo(AttributeImpl target) { + // this attribute has no contents to copy + } + + @Override + public Object clone() { + // cannot throw CloneNotSupportedException (checked) + throw new UnsupportedOperationException(); + } + } + /** * Creates a token stream for numeric values using the default precisionStep * {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The stream is not yet initialized, @@ -107,23 +194,15 @@ * before using set a value using the various set???Value() methods. */ public NumericTokenStream(final int precisionStep) { - super(); - this.precisionStep = precisionStep; - if (precisionStep < 1) - throw new IllegalArgumentException("precisionStep must be >=1"); - } + super(new NumericAttributeFactory(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY)); + // we must do this after the super call :( + ((NumericAttributeFactory) getAttributeFactory()).ts = this; + addAttribute(NumericTermAttribute.class); - /** - * Expert: Creates a token stream for numeric values with the specified - * precisionStep using the given {@link AttributeSource}. - * The stream is not yet initialized, - * before using set a value using the various set???Value() methods. - */ - public NumericTokenStream(AttributeSource source, final int precisionStep) { - super(source); this.precisionStep = precisionStep; if (precisionStep < 1) throw new IllegalArgumentException("precisionStep must be >=1"); + shift = -precisionStep; } /** @@ -134,10 +213,15 @@ * before using set a value using the various set???Value() methods. */ public NumericTokenStream(AttributeFactory factory, final int precisionStep) { - super(factory); + super(new NumericAttributeFactory(factory)); + // we must do this after the super call :( + ((NumericAttributeFactory) getAttributeFactory()).ts = this; + addAttribute(NumericTermAttribute.class); + this.precisionStep = precisionStep; if (precisionStep < 1) throw new IllegalArgumentException("precisionStep must be >=1"); + shift = -precisionStep; } /** @@ -149,7 +233,7 @@ public NumericTokenStream setLongValue(final long value) { this.value = value; valSize = 64; - shift = 0; + shift = -precisionStep; return this; } @@ -162,7 +246,7 @@ public NumericTokenStream setIntValue(final int value) { this.value = value; valSize = 32; - shift = 0; + shift = -precisionStep; return this; } @@ -175,7 +259,7 @@ public NumericTokenStream setDoubleValue(final double value) { this.value = NumericUtils.doubleToSortableLong(value); valSize = 64; - shift = 0; + shift = -precisionStep; return this; } @@ -188,7 +272,7 @@ public NumericTokenStream setFloatValue(final float value) { this.value = NumericUtils.floatToSortableInt(value); valSize = 32; - shift = 0; + shift = -precisionStep; return this; } @@ -196,37 +280,24 @@ public void reset() { if (valSize == 0) throw new IllegalStateException("call set???Value() before usage"); - shift = 0; + shift = -precisionStep; } @Override public boolean incrementToken() { if (valSize == 0) throw new IllegalStateException("call set???Value() before usage"); - if (shift >= valSize) + shift += precisionStep; + if (shift >= valSize) { + // reset so the attribute still works after exhausted stream + shift -= precisionStep; return false; + } clearAttributes(); - final char[] buffer; - switch (valSize) { - case 64: - buffer = termAtt.resizeTermBuffer(NumericUtils.BUF_SIZE_LONG); - termAtt.setTermLength(NumericUtils.longToPrefixCoded(value, shift, buffer)); - break; - - case 32: - buffer = termAtt.resizeTermBuffer(NumericUtils.BUF_SIZE_INT); - termAtt.setTermLength(NumericUtils.intToPrefixCoded((int) value, shift, buffer)); - break; - - default: - // should not happen - throw new IllegalArgumentException("valSize must be 32 or 64"); - } - + // the TermToBytesRefAttribute is directly accessing shift & value. typeAtt.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC : TOKEN_TYPE_LOWER_PREC); posIncrAtt.setPositionIncrement((shift == 0) ? 1 : 0); - shift += precisionStep; return true; } @@ -238,12 +309,11 @@ } // members - private final TermAttribute termAtt = addAttribute(TermAttribute.class); private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); - private int shift = 0, valSize = 0; // valSize==0 means not initialized + int shift, valSize = 0; // valSize==0 means not initialized private final int precisionStep; - private long value = 0L; + long value = 0L; } Index: src/java/org/apache/lucene/search/NumericRangeQuery.java =================================================================== --- src/java/org/apache/lucene/search/NumericRangeQuery.java (revision 928371) +++ src/java/org/apache/lucene/search/NumericRangeQuery.java (working copy) @@ -379,9 +379,9 @@ */ private final class NumericRangeTermsEnum extends FilteredTermsEnum { - private final BytesRef currentLowerBound = new BytesRef(), currentUpperBound = new BytesRef(); + private BytesRef currentLowerBound, currentUpperBound; - private final LinkedList rangeBounds = new LinkedList(); + private final LinkedList rangeBounds = new LinkedList(); private final Comparator termComp; NumericRangeTermsEnum(final IndexReader reader) throws IOException { @@ -414,7 +414,7 @@ NumericUtils.splitLongRange(new NumericUtils.LongRangeBuilder() { @Override - public final void addRange(String minPrefixCoded, String maxPrefixCoded) { + public final void addRange(BytesRef minPrefixCoded, BytesRef maxPrefixCoded) { rangeBounds.add(minPrefixCoded); rangeBounds.add(maxPrefixCoded); } @@ -449,7 +449,7 @@ NumericUtils.splitIntRange(new NumericUtils.IntRangeBuilder() { @Override - public final void addRange(String minPrefixCoded, String maxPrefixCoded) { + public final void addRange(BytesRef minPrefixCoded, BytesRef maxPrefixCoded) { rangeBounds.add(minPrefixCoded); rangeBounds.add(maxPrefixCoded); } @@ -470,11 +470,11 @@ if (rangeBounds.size() >= 2) { assert rangeBounds.size() % 2 == 0; - this.currentLowerBound.copy(rangeBounds.removeFirst()); - assert termComp.compare(currentUpperBound, currentLowerBound) <= 0 : + this.currentLowerBound = rangeBounds.removeFirst(); + assert currentUpperBound == null || termComp.compare(currentUpperBound, currentLowerBound) <= 0 : "The current upper bound must be <= the new lower bound"; - this.currentUpperBound.copy(rangeBounds.removeFirst()); + this.currentUpperBound = rangeBounds.removeFirst(); return currentLowerBound; } Index: src/java/org/apache/lucene/util/NumericUtils.java =================================================================== --- src/java/org/apache/lucene/util/NumericUtils.java (revision 928371) +++ src/java/org/apache/lucene/util/NumericUtils.java (working copy) @@ -32,10 +32,10 @@ * more exactly. This reduces the number of terms dramatically. * *

This class generates terms to achieve this: First the numerical integer values need to - * be converted to strings. For that integer values (32 bit or 64 bit) are made unsigned - * and the bits are converted to ASCII chars with each 7 bit. The resulting string is - * sortable like the original integer value. Each value is also prefixed - * (in the first char) by the shift value (number of bits removed) used + * be converted to bytes. For that integer values (32 bit or 64 bit) are made unsigned + * and the bits are converted to ASCII chars with each 7 bit. The resulting byte[] is + * sortable like the original integer value (even using UTF-8 sort order). Each value is also + * prefixed (in the first char) by the shift value (number of bits removed) used * during encoding. * *

To also index floating point numbers, this class supplies two methods to convert them @@ -51,13 +51,13 @@ * {@link NumericRangeQuery} and {@link NumericRangeFilter} implement the query part * for the same data types. * - *

This class can also be used, to generate lexicographically sortable (according - * {@link String#compareTo(String)}) representations of numeric data types for other - * usages (e.g. sorting). + *

This class can also be used, to generate lexicographically sortable (according to + * {@link BytesRef#getUTF8SortedAsUTF16Comparator()}) representations of numeric data + * types for other usages (e.g. sorting). * * @lucene.experimental * - * @since 2.9 + * @since 2.9, API changed non backwards-compliant in 3.1 */ public final class NumericUtils { @@ -71,27 +71,27 @@ /** * Expert: Longs are stored at lower precision by shifting off lower bits. The shift count is - * stored as SHIFT_START_LONG+shift in the first character + * stored as SHIFT_START_LONG+shift in the first byte */ - public static final char SHIFT_START_LONG = (char)0x20; + public static final byte SHIFT_START_LONG = 0x20; /** - * Expert: The maximum term length (used for char[] buffer size) + * Expert: The maximum term length (used for byte[] buffer size) * for encoding long values. - * @see #longToPrefixCoded(long,int,char[]) + * @see #longToPrefixCoded(long,int,BytesRef) */ public static final int BUF_SIZE_LONG = 63/7 + 2; /** * Expert: Integers are stored at lower precision by shifting off lower bits. The shift count is - * stored as SHIFT_START_INT+shift in the first character + * stored as SHIFT_START_INT+shift in the first byte */ - public static final char SHIFT_START_INT = (char)0x60; + public static final byte SHIFT_START_INT = 0x60; /** - * Expert: The maximum term length (used for char[] buffer size) + * Expert: The maximum term length (used for byte[] buffer size) * for encoding int values. - * @see #intToPrefixCoded(int,int,char[]) + * @see #intToPrefixCoded(int,int,BytesRef) */ public static final int BUF_SIZE_INT = 31/7 + 2; @@ -100,25 +100,33 @@ * This is method is used by {@link NumericTokenStream}. * @param val the numeric value * @param shift how many bits to strip from the right - * @param buffer that will contain the encoded chars, must be at least of {@link #BUF_SIZE_LONG} - * length - * @return number of chars written to buffer + * @param bytes will contain the encoded value + * @return the hash code for indexing (TermsHash) */ - public static int longToPrefixCoded(final long val, final int shift, final char[] buffer) { + public static int longToPrefixCoded(final long val, final int shift, final BytesRef bytes) { if (shift>63 || shift<0) throw new IllegalArgumentException("Illegal shift value, must be 0..63"); - int nChars = (63-shift)/7 + 1, len = nChars+1; - buffer[0] = (char)(SHIFT_START_LONG + shift); + if (bytes.bytes == null) { + bytes.bytes = new byte[NumericUtils.BUF_SIZE_LONG]; + } else if (bytes.bytes.length < NumericUtils.BUF_SIZE_LONG) { + bytes.grow(NumericUtils.BUF_SIZE_LONG); + } + int nChars = (63-shift)/7 + 1; + bytes.length = nChars+1; + int hash = (bytes.bytes[0] = (byte) (SHIFT_START_LONG + shift)); long sortableBits = val ^ 0x8000000000000000L; sortableBits >>>= shift; while (nChars>=1) { - // Store 7 bits per character for good efficiency when UTF-8 encoding. - // The whole number is right-justified so that lucene can prefix-encode - // the terms more efficiently. - buffer[nChars--] = (char)(sortableBits & 0x7f); + // Store 7 bits per byte for compatibility + // with UTF-8 encoding of terms + bytes.bytes[nChars--] = (byte)(sortableBits & 0x7f); sortableBits >>>= 7; } - return len; + // TODO: optimize this to do it in above loop + for (int i = 1; i < bytes.length; i++) { + hash = 31*hash + bytes.bytes[i]; + } + return hash; } /** @@ -126,11 +134,13 @@ * This is method is used by {@link LongRangeBuilder}. * @param val the numeric value * @param shift how many bits to strip from the right + * @deprecated This method is no longer needed! */ + @Deprecated public static String longToPrefixCoded(final long val, final int shift) { - final char[] buffer = new char[BUF_SIZE_LONG]; - final int len = longToPrefixCoded(val, shift, buffer); - return new String(buffer, 0, len); + final BytesRef buffer = new BytesRef(BUF_SIZE_LONG); + longToPrefixCoded(val, shift, buffer); + return buffer.utf8ToString(); } /** @@ -138,7 +148,9 @@ * reducing the precision. It can be used to store the full precision value as a * stored field in index. *

To decode, use {@link #prefixCodedToLong}. + * @deprecated This method is no longer needed! */ + @Deprecated public static String longToPrefixCoded(final long val) { return longToPrefixCoded(val, 0); } @@ -148,25 +160,33 @@ * This is method is used by {@link NumericTokenStream}. * @param val the numeric value * @param shift how many bits to strip from the right - * @param buffer that will contain the encoded chars, must be at least of {@link #BUF_SIZE_INT} - * length - * @return number of chars written to buffer + * @param bytes will contain the encoded value + * @return the hash code for indexing (TermsHash) */ - public static int intToPrefixCoded(final int val, final int shift, final char[] buffer) { + public static int intToPrefixCoded(final int val, final int shift, final BytesRef bytes) { if (shift>31 || shift<0) throw new IllegalArgumentException("Illegal shift value, must be 0..31"); - int nChars = (31-shift)/7 + 1, len = nChars+1; - buffer[0] = (char)(SHIFT_START_INT + shift); + if (bytes.bytes == null) { + bytes.bytes = new byte[NumericUtils.BUF_SIZE_INT]; + } else if (bytes.bytes.length < NumericUtils.BUF_SIZE_INT) { + bytes.grow(NumericUtils.BUF_SIZE_INT); + } + int nChars = (31-shift)/7 + 1; + bytes.length = nChars+1; + int hash = (bytes.bytes[0] = (byte)(SHIFT_START_INT + shift)); int sortableBits = val ^ 0x80000000; sortableBits >>>= shift; while (nChars>=1) { - // Store 7 bits per character for good efficiency when UTF-8 encoding. - // The whole number is right-justified so that lucene can prefix-encode - // the terms more efficiently. - buffer[nChars--] = (char)(sortableBits & 0x7f); + // Store 7 bits per byte for compatibility + // with UTF-8 encoding of terms + bytes.bytes[nChars--] = (byte)(sortableBits & 0x7f); sortableBits >>>= 7; } - return len; + // TODO: optimize this to do it in above loop + for (int i = 1; i < bytes.length; i++) { + hash = 31*hash + bytes.bytes[i]; + } + return hash; } /** @@ -174,11 +194,13 @@ * This is method is used by {@link IntRangeBuilder}. * @param val the numeric value * @param shift how many bits to strip from the right + * @deprecated This method is no longer needed! */ + @Deprecated public static String intToPrefixCoded(final int val, final int shift) { - final char[] buffer = new char[BUF_SIZE_INT]; - final int len = intToPrefixCoded(val, shift, buffer); - return new String(buffer, 0, len); + final BytesRef buffer = new BytesRef(BUF_SIZE_INT); + intToPrefixCoded(val, shift, buffer); + return buffer.utf8ToString(); } /** @@ -186,7 +208,9 @@ * reducing the precision. It can be used to store the full precision value as a * stored field in index. *

To decode, use {@link #prefixCodedToInt}. + * @deprecated This method is no longer needed! */ + @Deprecated public static String intToPrefixCoded(final int val) { return intToPrefixCoded(val, 0); } @@ -198,42 +222,36 @@ * @throws NumberFormatException if the supplied string is * not correctly prefix encoded. * @see #longToPrefixCoded(long) + * @deprecated This method is no longer needed! */ + @Deprecated public static long prefixCodedToLong(final String prefixCoded) { - final int shift = prefixCoded.charAt(0)-SHIFT_START_LONG; - if (shift>63 || shift<0) - throw new NumberFormatException("Invalid shift value in prefixCoded string (is encoded value really a LONG?)"); - long sortableBits = 0L; - for (int i=1, len=prefixCoded.length(); i0x7f) { - throw new NumberFormatException( - "Invalid prefixCoded numerical value representation (char "+ - Integer.toHexString(ch)+" at position "+i+" is invalid)" - ); - } - sortableBits |= ch; - } - return (sortableBits << shift) ^ 0x8000000000000000L; + return prefixCodedToLong(new BytesRef(prefixCoded)); } + /** + * Returns a long from prefixCoded bytes. + * Rightmost bits will be zero for lower precision codes. + * This method can be used to decode e.g. a stored field. + * @throws NumberFormatException if the supplied {@link BytesRef} is + * not correctly prefix encoded. + * @see #longToPrefixCoded(long) + */ public static long prefixCodedToLong(final BytesRef term) { final int shift = term.bytes[term.offset]-SHIFT_START_LONG; if (shift>63 || shift<0) - throw new NumberFormatException("Invalid shift value in prefixCoded string (is encoded value really an INT?)"); + throw new NumberFormatException("Invalid shift value in prefixCoded bytes (is encoded value really an INT?)"); long sortableBits = 0L; - final int limit = term.offset + term.length; - for (int i=term.offset+1; i31 || shift<0) - throw new NumberFormatException("Invalid shift value in prefixCoded string (is encoded value really an INT?)"); - int sortableBits = 0; - for (int i=1, len=prefixCoded.length(); i0x7f) { - throw new NumberFormatException( - "Invalid prefixCoded numerical value representation (char "+ - Integer.toHexString(ch)+" at position "+i+" is invalid)" - ); - } - sortableBits |= ch; - } - return (sortableBits << shift) ^ 0x80000000; + return prefixCodedToInt(new BytesRef(prefixCoded)); } + /** + * Returns an int from prefixCoded bytes. + * Rightmost bits will be zero for lower precision codes. + * This method can be used to decode e.g. a stored field. + * @throws NumberFormatException if the supplied {@link BytesRef} is + * not correctly prefix encoded. + * @see #intToPrefixCoded(int) + */ public static int prefixCodedToInt(final BytesRef term) { final int shift = term.bytes[term.offset]-SHIFT_START_INT; if (shift>31 || shift<0) - throw new NumberFormatException("Invalid shift value in prefixCoded string (is encoded value really an INT?)"); + throw new NumberFormatException("Invalid shift value in prefixCoded bytes (is encoded value really an INT?)"); int sortableBits = 0; - final int limit = term.offset + term.length; - for (int i=term.offset+1; iThis method is used by {@link NumericRangeQuery}. */ @@ -379,7 +399,7 @@ * Expert: Splits an int range recursively. * You may implement a builder that adds clauses to a * {@link org.apache.lucene.search.BooleanQuery} for each call to its - * {@link IntRangeBuilder#addRange(String,String)} + * {@link IntRangeBuilder#addRange(BytesRef,BytesRef)} * method. *

This method is used by {@link NumericRangeQuery}. */ @@ -454,8 +474,9 @@ /** * Expert: Callback for {@link #splitLongRange}. * You need to overwrite only one of the methods. - *

NOTE: This is a very low-level interface, - * the method signatures may change in later versions. + * @lucene.experimental NOTE: This is a very low-level interface, + * the method signatures may change in later versions. + * @since 2.9, API changed non backwards-compliant in 3.1 */ public static abstract class LongRangeBuilder { @@ -463,7 +484,7 @@ * Overwrite this method, if you like to receive the already prefix encoded range bounds. * You can directly build classical (inclusive) range queries from them. */ - public void addRange(String minPrefixCoded, String maxPrefixCoded) { + public void addRange(BytesRef minPrefixCoded, BytesRef maxPrefixCoded) { throw new UnsupportedOperationException(); } @@ -472,7 +493,10 @@ * You can use this for e.g. debugging purposes (print out range bounds). */ public void addRange(final long min, final long max, final int shift) { - addRange(longToPrefixCoded(min, shift), longToPrefixCoded(max, shift)); + final BytesRef minBytes = new BytesRef(BUF_SIZE_LONG), maxBytes = new BytesRef(BUF_SIZE_LONG); + longToPrefixCoded(min, shift, minBytes); + longToPrefixCoded(max, shift, maxBytes); + addRange(minBytes, maxBytes); } } @@ -480,8 +504,9 @@ /** * Expert: Callback for {@link #splitIntRange}. * You need to overwrite only one of the methods. - *

NOTE: This is a very low-level interface, - * the method signatures may change in later versions. + * @lucene.experimental NOTE: This is a very low-level interface, + * the method signatures may change in later versions. + * @since 2.9, API changed non backwards-compliant in 3.1 */ public static abstract class IntRangeBuilder { @@ -489,7 +514,7 @@ * Overwrite this method, if you like to receive the already prefix encoded range bounds. * You can directly build classical range (inclusive) queries from them. */ - public void addRange(String minPrefixCoded, String maxPrefixCoded) { + public void addRange(BytesRef minPrefixCoded, BytesRef maxPrefixCoded) { throw new UnsupportedOperationException(); } @@ -498,7 +523,10 @@ * You can use this for e.g. debugging purposes (print out range bounds). */ public void addRange(final int min, final int max, final int shift) { - addRange(intToPrefixCoded(min, shift), intToPrefixCoded(max, shift)); + final BytesRef minBytes = new BytesRef(BUF_SIZE_INT), maxBytes = new BytesRef(BUF_SIZE_INT); + intToPrefixCoded(min, shift, minBytes); + intToPrefixCoded(max, shift, maxBytes); + addRange(minBytes, maxBytes); } } Index: src/test/org/apache/lucene/analysis/TestNumericTokenStream.java =================================================================== --- src/test/org/apache/lucene/analysis/TestNumericTokenStream.java (revision 928371) +++ src/test/org/apache/lucene/analysis/TestNumericTokenStream.java (working copy) @@ -17,8 +17,9 @@ * limitations under the License. */ +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; public class TestNumericTokenStream extends BaseTokenStreamTestCase { @@ -29,27 +30,47 @@ public void testLongStream() throws Exception { final NumericTokenStream stream=new NumericTokenStream().setLongValue(lvalue); // use getAttribute to test if attributes really exist, if not an IAE will be throwed - final TermAttribute termAtt = stream.getAttribute(TermAttribute.class); + final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class); final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class); + final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class); + final BytesRef bytes = new BytesRef(); + stream.reset(); + assertEquals(64, numericAtt.getValueSize()); + assertEquals(lvalue, numericAtt.getRawValue()); for (int shift=0; shift<64; shift+=NumericUtils.PRECISION_STEP_DEFAULT) { assertTrue("New token is available", stream.incrementToken()); - assertEquals("Term is correctly encoded", NumericUtils.longToPrefixCoded(lvalue, shift), termAtt.term()); - assertEquals("Type correct", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type()); + assertEquals("Shift value wrong", shift, numericAtt.getShift()); + final int hash = bytesAtt.toBytesRef(bytes); + assertEquals("Hash incorrect", bytes.hashCode(), hash); + assertEquals("Term is incorrectly encoded", lvalue & ~((1L << shift) - 1L), NumericUtils.prefixCodedToLong(bytes)); + assertEquals("Type incorrect", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type()); } - assertFalse("No more tokens available", stream.incrementToken()); + assertFalse("More tokens available", stream.incrementToken()); + stream.end(); + stream.close(); } public void testIntStream() throws Exception { final NumericTokenStream stream=new NumericTokenStream().setIntValue(ivalue); // use getAttribute to test if attributes really exist, if not an IAE will be throwed - final TermAttribute termAtt = stream.getAttribute(TermAttribute.class); + final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class); final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class); + final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class); + final BytesRef bytes = new BytesRef(); + stream.reset(); + assertEquals(32, numericAtt.getValueSize()); + assertEquals(ivalue, numericAtt.getRawValue()); for (int shift=0; shift<32; shift+=NumericUtils.PRECISION_STEP_DEFAULT) { assertTrue("New token is available", stream.incrementToken()); - assertEquals("Term is correctly encoded", NumericUtils.intToPrefixCoded(ivalue, shift), termAtt.term()); - assertEquals("Type correct", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type()); + assertEquals("Shift value wrong", shift, numericAtt.getShift()); + final int hash = bytesAtt.toBytesRef(bytes); + assertEquals("Hash incorrect", bytes.hashCode(), hash); + assertEquals("Term is incorrectly encoded", ivalue & ~((1 << shift) - 1), NumericUtils.prefixCodedToInt(bytes)); + assertEquals("Type incorrect", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type()); } - assertFalse("No more tokens available", stream.incrementToken()); + assertFalse("More tokens available", stream.incrementToken()); + stream.end(); + stream.close(); } public void testNotInitialized() throws Exception {