Index: backwards/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java =================================================================== --- backwards/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java (revision 930707) +++ backwards/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java (working copy) @@ -1,73 +0,0 @@ -package org.apache.lucene.analysis; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.util.NumericUtils; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; -import org.apache.lucene.analysis.tokenattributes.TypeAttribute; - -public class TestNumericTokenStream extends BaseTokenStreamTestCase { - - static final long lvalue = 4573245871874382L; - static final int ivalue = 123456; - - public void testLongStream() throws Exception { - final NumericTokenStream stream=new NumericTokenStream().setLongValue(lvalue); - // use getAttribute to test if attributes really exist, if not an IAE will be throwed - final TermAttribute termAtt = stream.getAttribute(TermAttribute.class); - final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class); - for (int shift=0; shift<64; shift+=NumericUtils.PRECISION_STEP_DEFAULT) { - assertTrue("New token is available", stream.incrementToken()); - assertEquals("Term is correctly encoded", NumericUtils.longToPrefixCoded(lvalue, shift), termAtt.term()); - assertEquals("Type correct", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type()); - } - assertFalse("No more tokens available", stream.incrementToken()); - } - - public void testIntStream() throws Exception { - final NumericTokenStream stream=new NumericTokenStream().setIntValue(ivalue); - // use getAttribute to test if attributes really exist, if not an IAE will be throwed - final TermAttribute termAtt = stream.getAttribute(TermAttribute.class); - final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class); - for (int shift=0; shift<32; shift+=NumericUtils.PRECISION_STEP_DEFAULT) { - assertTrue("New token is available", stream.incrementToken()); - assertEquals("Term is correctly encoded", NumericUtils.intToPrefixCoded(ivalue, shift), termAtt.term()); - assertEquals("Type correct", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type()); - } - assertFalse("No more tokens available", stream.incrementToken()); - } - - public void testNotInitialized() throws Exception { - final NumericTokenStream stream=new NumericTokenStream(); - - try { - stream.reset(); - fail("reset() should not succeed."); - } catch (IllegalStateException e) { - // pass - } - - try { - stream.incrementToken(); - fail("incrementToken() should not succeed."); - } catch (IllegalStateException e) { - // pass - } - } - -} Index: backwards/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java =================================================================== --- backwards/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java (revision 930707) +++ backwards/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java (working copy) @@ -230,6 +230,8 @@ testRightOpenRange(2); } + /* TESTs disabled, because incompatible API change in 3.1/flex: + private void testRandomTrieAndClassicRangeQuery(int precisionStep) throws Exception { final Random rnd=newRandom(); String field="field"+precisionStep; @@ -298,6 +300,8 @@ testRandomTrieAndClassicRangeQuery(Integer.MAX_VALUE); } + */ + private void testRangeSplit(int precisionStep) throws Exception { final Random rnd=newRandom(); String field="ascfield"+precisionStep; Index: backwards/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java =================================================================== --- backwards/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java (revision 930707) +++ backwards/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java (working copy) @@ -245,6 +245,8 @@ testRightOpenRange(2); } + /* TESTs disabled, because incompatible API change in 3.1/flex: + private void testRandomTrieAndClassicRangeQuery(int precisionStep) throws Exception { final Random rnd=newRandom(); String field="field"+precisionStep; @@ -317,6 +319,8 @@ testRandomTrieAndClassicRangeQuery(Integer.MAX_VALUE); } + */ + private void testRangeSplit(int precisionStep) throws Exception { final Random rnd=newRandom(); String field="ascfield"+precisionStep; Index: backwards/src/test/org/apache/lucene/util/TestNumericUtils.java =================================================================== --- backwards/src/test/org/apache/lucene/util/TestNumericUtils.java (revision 930707) +++ backwards/src/test/org/apache/lucene/util/TestNumericUtils.java (working copy) @@ -26,6 +26,8 @@ public class TestNumericUtils extends LuceneTestCase { + /* TESTs disabled, because incompatible API change in 3.1/flex: + public void testLongConversionAndOrdering() throws Exception { // generate a series of encoded longs, each numerical one bigger than the one before String last=null; @@ -131,6 +133,8 @@ } } } + + */ public void testDoubles() throws Exception { double[] vals=new double[]{ Index: CHANGES.txt =================================================================== --- CHANGES.txt (revision 930707) +++ CHANGES.txt (working copy) @@ -4,7 +4,7 @@ Changes in backwards compatibility policy -* LUCENE-1458, LUCENE-2111: Changes from flexible indexing: +* LUCENE-1458, LUCENE-2111, LUCENE-2354: Changes from flexible indexing: - MultiReader ctor now throws IOException @@ -28,6 +28,23 @@ if you could fix your parser to instead operate directly on the byte[] in the BytesRef. + - The internal (experimental) API of NumericUtils changed completely + from String to BytesRef. Client code should never use this class, + so the change would normally not affect you. If you used some of + the methods to inspect terms or create TermQueries out of + prefix encoded terms, change to use BytesRef. Please note: + Do not use TermQueries to search for single numeric terms. + The recommended way is to create a corresponding NumericRangeQuery + with upper and lower bound equal and included. TermQueries do not + score correct, so the constant score mode of NRQ is the only + correct way to handle single value queries. + + - NumericTokenStream now works directly on byte[] terms. If you + plug a TokenFilter on top of this stream, you will likely get + an IllegalArgumentException, because the NTS does not support + TermAttribute/CharTermAttribute. If you want to further filter + or attach Payloads to NTS, use the new NumericTermAttribute. + Bug Fixes * LUCENE-2222: FixedIntBlockIndexInput incorrectly read one block of Index: contrib/spatial/src/java/org/apache/lucene/spatial/tier/CartesianShapeFilter.java =================================================================== --- contrib/spatial/src/java/org/apache/lucene/spatial/tier/CartesianShapeFilter.java (revision 930707) +++ contrib/spatial/src/java/org/apache/lucene/spatial/tier/CartesianShapeFilter.java (working copy) @@ -19,12 +19,15 @@ import java.io.IOException; import java.util.List; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermDocs; +import org.apache.lucene.index.MultiFields; import org.apache.lucene.search.Filter; import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.util.NumericUtils; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.OpenBitSet; /** @@ -44,22 +47,41 @@ @Override public DocIdSet getDocIdSet(final IndexReader reader) throws IOException { - final OpenBitSet bits = new OpenBitSet(reader.maxDoc()); - final TermDocs termDocs = reader.termDocs(); + final Bits delDocs = MultiFields.getDeletedDocs(reader); final List area = shape.getArea(); - int sz = area.size(); + final int sz = area.size(); - final Term term = new Term(fieldName); // iterate through each boxid - for (int i =0; i< sz; i++) { - double boxId = area.get(i).doubleValue(); - termDocs.seek(term.createTerm(NumericUtils.doubleToPrefixCoded(boxId))); - // iterate through all documents - // which have this boxId - while (termDocs.next()) { - bits.fastSet(termDocs.doc()); + final BytesRef bytesRef = new BytesRef(NumericUtils.BUF_SIZE_LONG); + if (sz == 1) { + double boxId = area.get(0).doubleValue(); + NumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(boxId), 0, bytesRef); + return new DocIdSet() { + @Override + public DocIdSetIterator iterator() throws IOException { + return MultiFields.getTermDocsEnum(reader, delDocs, fieldName, bytesRef); + } + + @Override + public boolean isCacheable() { + return false; + } + }; + } else { + final OpenBitSet bits = new OpenBitSet(reader.maxDoc()); + for (int i =0; i< sz; i++) { + double boxId = area.get(i).doubleValue(); + NumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(boxId), 0, bytesRef); + final DocsEnum docsEnum = MultiFields.getTermDocsEnum(reader, delDocs, fieldName, bytesRef); + if (docsEnum == null) continue; + // iterate through all documents + // which have this boxId + int doc; + while ((doc = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + bits.fastSet(doc); + } } + return bits; } - return bits; } } Index: contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestCartesian.java =================================================================== --- contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestCartesian.java (revision 930707) +++ contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestCartesian.java (working copy) @@ -24,6 +24,7 @@ import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.NumericField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriterConfig; @@ -49,7 +50,6 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.NumericUtils; public class TestCartesian extends LuceneTestCase { @@ -96,8 +96,8 @@ doc.add(new Field("name", name,Field.Store.YES, Field.Index.ANALYZED)); // convert the lat / long to lucene fields - doc.add(new Field(latField, NumericUtils.doubleToPrefixCoded(lat),Field.Store.YES, Field.Index.NOT_ANALYZED)); - doc.add(new Field(lngField, NumericUtils.doubleToPrefixCoded(lng),Field.Store.YES, Field.Index.NOT_ANALYZED)); + doc.add(new NumericField(latField, Integer.MAX_VALUE, Field.Store.YES, true).setDoubleValue(lat)); + doc.add(new NumericField(lngField, Integer.MAX_VALUE, Field.Store.YES, true).setDoubleValue(lng)); // add a default meta field to make searching all documents easy doc.add(new Field("metafile", "doc",Field.Store.YES, Field.Index.ANALYZED)); @@ -105,10 +105,9 @@ int ctpsize = ctps.size(); for (int i =0; i < ctpsize; i++){ CartesianTierPlotter ctp = ctps.get(i); - doc.add(new Field(ctp.getTierFieldName(), - NumericUtils.doubleToPrefixCoded(ctp.getTierBoxId(lat,lng)), + doc.add(new NumericField(ctp.getTierFieldName(), Integer.MAX_VALUE, Field.Store.YES, - Field.Index.NOT_ANALYZED_NO_NORMS)); + true).setDoubleValue(ctp.getTierBoxId(lat,lng))); doc.add(new Field(geoHashPrefix, GeoHashUtils.encode(lat,lng), Field.Store.YES, @@ -245,8 +244,8 @@ Document d = searcher.doc(scoreDocs[i].doc); String name = d.get("name"); - double rsLat = NumericUtils.prefixCodedToDouble(d.get(latField)); - double rsLng = NumericUtils.prefixCodedToDouble(d.get(lngField)); + double rsLat = Double.parseDouble(d.get(latField)); + double rsLng = Double.parseDouble(d.get(lngField)); Double geo_distance = distances.get(scoreDocs[i].doc); double distance = DistanceUtils.getInstance().getDistanceMi(lat, lng, rsLat, rsLng); @@ -339,8 +338,8 @@ for(int i =0 ; i < results; i++){ Document d = searcher.doc(scoreDocs[i].doc); String name = d.get("name"); - double rsLat = NumericUtils.prefixCodedToDouble(d.get(latField)); - double rsLng = NumericUtils.prefixCodedToDouble(d.get(lngField)); + double rsLat = Double.parseDouble(d.get(latField)); + double rsLng = Double.parseDouble(d.get(lngField)); Double geo_distance = distances.get(scoreDocs[i].doc); double distance = DistanceUtils.getInstance().getDistanceMi(lat, lng, rsLat, rsLng); @@ -434,8 +433,8 @@ Document d = searcher.doc(scoreDocs[i].doc); String name = d.get("name"); - double rsLat = NumericUtils.prefixCodedToDouble(d.get(latField)); - double rsLng = NumericUtils.prefixCodedToDouble(d.get(lngField)); + double rsLat = Double.parseDouble(d.get(latField)); + double rsLng = Double.parseDouble(d.get(lngField)); Double geo_distance = distances.get(scoreDocs[i].doc); double distance = DistanceUtils.getInstance().getDistanceMi(lat, lng, rsLat, rsLng); @@ -528,8 +527,8 @@ Document d = searcher.doc(scoreDocs[i].doc); String name = d.get("name"); - double rsLat = NumericUtils.prefixCodedToDouble(d.get(latField)); - double rsLng = NumericUtils.prefixCodedToDouble(d.get(lngField)); + double rsLat = Double.parseDouble(d.get(latField)); + double rsLng = Double.parseDouble(d.get(lngField)); Double geo_distance = distances.get(scoreDocs[i].doc); double distance = DistanceUtils.getInstance().getDistanceMi(lat, lng, rsLat, rsLng); Index: contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java =================================================================== --- contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java (revision 930707) +++ contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java (working copy) @@ -21,6 +21,7 @@ import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.NumericField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; @@ -28,7 +29,6 @@ import org.apache.lucene.search.QueryWrapperFilter; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.NumericUtils; import org.apache.lucene.store.RAMDirectory; public class TestDistance extends LuceneTestCase { @@ -63,8 +63,8 @@ doc.add(new Field("name", name,Field.Store.YES, Field.Index.ANALYZED)); // convert the lat / long to lucene fields - doc.add(new Field(latField, NumericUtils.doubleToPrefixCoded(lat),Field.Store.YES, Field.Index.NOT_ANALYZED)); - doc.add(new Field(lngField, NumericUtils.doubleToPrefixCoded(lng),Field.Store.YES, Field.Index.NOT_ANALYZED)); + doc.add(new NumericField(latField, Integer.MAX_VALUE, Field.Store.YES, true).setDoubleValue(lat)); + doc.add(new NumericField(lngField, Integer.MAX_VALUE,Field.Store.YES, true).setDoubleValue(lng)); // add a default meta field to make searching all documents easy doc.add(new Field("metafile", "doc",Field.Store.YES, Field.Index.ANALYZED)); Index: src/java/org/apache/lucene/analysis/NumericTokenStream.java =================================================================== --- src/java/org/apache/lucene/analysis/NumericTokenStream.java (revision 930707) +++ src/java/org/apache/lucene/analysis/NumericTokenStream.java (working copy) @@ -17,12 +17,17 @@ * limitations under the License. */ +import org.apache.lucene.util.Attribute; +import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; import org.apache.lucene.document.NumericField; // for javadocs import org.apache.lucene.search.NumericRangeQuery; // for javadocs import org.apache.lucene.search.NumericRangeFilter; // for javadocs +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; @@ -91,7 +96,89 @@ /** The lower precision tokens gets this token type assigned. */ public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecNumeric"; + + /** Expert: Use this attribute to get the details of the currently generated token + * @lucene.experimental + * @since 3.1 + */ + public interface NumericTermAttribute extends Attribute { + /** Returns current shift value, undefined before first token */ + int getShift(); + /** Returns {@link NumericTokenStream}'s raw value as {@code long} */ + long getRawValue(); + /** Returns value size in bits (32 for {@code float}, {@code int}; 64 for {@code double}, {@code long}) */ + int getValueSize(); + } + + private static final class NumericAttributeFactory extends AttributeFactory { + private final AttributeFactory delegate; + private NumericTokenStream ts = null; + NumericAttributeFactory(AttributeFactory delegate) { + this.delegate = delegate; + } + + @Override + public AttributeImpl createAttributeInstance(Class attClass) { + if (attClass == NumericTermAttribute.class) + return new NumericTermAttributeImpl(ts); + if (attClass.isAssignableFrom(CharTermAttribute.class) || attClass.isAssignableFrom(TermAttribute.class)) + throw new IllegalArgumentException("NumericTokenStream does not support CharTermAttribute/TermAttribute."); + return delegate.createAttributeInstance(attClass); + } + } + + private static final class NumericTermAttributeImpl extends AttributeImpl implements NumericTermAttribute,TermToBytesRefAttribute { + private final NumericTokenStream ts; + + public NumericTermAttributeImpl(NumericTokenStream ts) { + this.ts = ts; + } + + public int toBytesRef(BytesRef bytes) { + try { + assert ts.valSize == 64 || ts.valSize == 32; + return (ts.valSize == 64) ? + NumericUtils.longToPrefixCoded(ts.value, ts.shift, bytes) : + NumericUtils.intToPrefixCoded((int) ts.value, ts.shift, bytes); + } catch (IllegalArgumentException iae) { + // return empty token before first + bytes.length = 0; + return 0; + } + } + + public int getShift() { return ts.shift; } + public long getRawValue() { return ts.value; } + public int getValueSize() { return ts.valSize; } + + @Override + public void clear() { + // this attribute has no contents to clear + } + + @Override + public boolean equals(Object other) { + return other == this; + } + + @Override + public int hashCode() { + return System.identityHashCode(this); + } + + @Override + public void copyTo(AttributeImpl target) { + // this attribute has no contents to copy + } + + @Override + public Object clone() { + // cannot throw CloneNotSupportedException (checked) + throw new UnsupportedOperationException(); + } + } + /** * Creates a token stream for numeric values using the default precisionStep * {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The stream is not yet initialized, @@ -107,23 +194,15 @@ * before using set a value using the various set???Value() methods. */ public NumericTokenStream(final int precisionStep) { - super(); - this.precisionStep = precisionStep; - if (precisionStep < 1) - throw new IllegalArgumentException("precisionStep must be >=1"); - } + super(new NumericAttributeFactory(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY)); + // we must do this after the super call :( + ((NumericAttributeFactory) getAttributeFactory()).ts = this; + addAttribute(NumericTermAttribute.class); - /** - * Expert: Creates a token stream for numeric values with the specified - * precisionStep using the given {@link AttributeSource}. - * The stream is not yet initialized, - * before using set a value using the various set???Value() methods. - */ - public NumericTokenStream(AttributeSource source, final int precisionStep) { - super(source); this.precisionStep = precisionStep; if (precisionStep < 1) throw new IllegalArgumentException("precisionStep must be >=1"); + shift = -precisionStep; } /** @@ -134,10 +213,15 @@ * before using set a value using the various set???Value() methods. */ public NumericTokenStream(AttributeFactory factory, final int precisionStep) { - super(factory); + super(new NumericAttributeFactory(factory)); + // we must do this after the super call :( + ((NumericAttributeFactory) getAttributeFactory()).ts = this; + addAttribute(NumericTermAttribute.class); + this.precisionStep = precisionStep; if (precisionStep < 1) throw new IllegalArgumentException("precisionStep must be >=1"); + shift = -precisionStep; } /** @@ -149,7 +233,7 @@ public NumericTokenStream setLongValue(final long value) { this.value = value; valSize = 64; - shift = 0; + shift = -precisionStep; return this; } @@ -162,7 +246,7 @@ public NumericTokenStream setIntValue(final int value) { this.value = value; valSize = 32; - shift = 0; + shift = -precisionStep; return this; } @@ -175,7 +259,7 @@ public NumericTokenStream setDoubleValue(final double value) { this.value = NumericUtils.doubleToSortableLong(value); valSize = 64; - shift = 0; + shift = -precisionStep; return this; } @@ -188,7 +272,7 @@ public NumericTokenStream setFloatValue(final float value) { this.value = NumericUtils.floatToSortableInt(value); valSize = 32; - shift = 0; + shift = -precisionStep; return this; } @@ -196,37 +280,24 @@ public void reset() { if (valSize == 0) throw new IllegalStateException("call set???Value() before usage"); - shift = 0; + shift = -precisionStep; } @Override public boolean incrementToken() { if (valSize == 0) throw new IllegalStateException("call set???Value() before usage"); - if (shift >= valSize) + shift += precisionStep; + if (shift >= valSize) { + // reset so the attribute still works after exhausted stream + shift -= precisionStep; return false; + } clearAttributes(); - final char[] buffer; - switch (valSize) { - case 64: - buffer = termAtt.resizeTermBuffer(NumericUtils.BUF_SIZE_LONG); - termAtt.setTermLength(NumericUtils.longToPrefixCoded(value, shift, buffer)); - break; - - case 32: - buffer = termAtt.resizeTermBuffer(NumericUtils.BUF_SIZE_INT); - termAtt.setTermLength(NumericUtils.intToPrefixCoded((int) value, shift, buffer)); - break; - - default: - // should not happen - throw new IllegalArgumentException("valSize must be 32 or 64"); - } - + // the TermToBytesRefAttribute is directly accessing shift & value. typeAtt.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC : TOKEN_TYPE_LOWER_PREC); posIncrAtt.setPositionIncrement((shift == 0) ? 1 : 0); - shift += precisionStep; return true; } @@ -238,12 +309,11 @@ } // members - private final TermAttribute termAtt = addAttribute(TermAttribute.class); private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); - private int shift = 0, valSize = 0; // valSize==0 means not initialized + int shift, valSize = 0; // valSize==0 means not initialized private final int precisionStep; - private long value = 0L; + long value = 0L; } Index: src/java/org/apache/lucene/search/NumericRangeQuery.java =================================================================== --- src/java/org/apache/lucene/search/NumericRangeQuery.java (revision 930707) +++ src/java/org/apache/lucene/search/NumericRangeQuery.java (working copy) @@ -379,9 +379,9 @@ */ private final class NumericRangeTermsEnum extends FilteredTermsEnum { - private final BytesRef currentLowerBound = new BytesRef(), currentUpperBound = new BytesRef(); + private BytesRef currentLowerBound, currentUpperBound; - private final LinkedList rangeBounds = new LinkedList(); + private final LinkedList rangeBounds = new LinkedList(); private final Comparator termComp; NumericRangeTermsEnum(final IndexReader reader) throws IOException { @@ -414,7 +414,7 @@ NumericUtils.splitLongRange(new NumericUtils.LongRangeBuilder() { @Override - public final void addRange(String minPrefixCoded, String maxPrefixCoded) { + public final void addRange(BytesRef minPrefixCoded, BytesRef maxPrefixCoded) { rangeBounds.add(minPrefixCoded); rangeBounds.add(maxPrefixCoded); } @@ -449,7 +449,7 @@ NumericUtils.splitIntRange(new NumericUtils.IntRangeBuilder() { @Override - public final void addRange(String minPrefixCoded, String maxPrefixCoded) { + public final void addRange(BytesRef minPrefixCoded, BytesRef maxPrefixCoded) { rangeBounds.add(minPrefixCoded); rangeBounds.add(maxPrefixCoded); } @@ -470,11 +470,11 @@ if (rangeBounds.size() >= 2) { assert rangeBounds.size() % 2 == 0; - this.currentLowerBound.copy(rangeBounds.removeFirst()); - assert termComp.compare(currentUpperBound, currentLowerBound) <= 0 : + this.currentLowerBound = rangeBounds.removeFirst(); + assert currentUpperBound == null || termComp.compare(currentUpperBound, currentLowerBound) <= 0 : "The current upper bound must be <= the new lower bound"; - this.currentUpperBound.copy(rangeBounds.removeFirst()); + this.currentUpperBound = rangeBounds.removeFirst(); return currentLowerBound; } Index: src/java/org/apache/lucene/util/BytesRef.java =================================================================== --- src/java/org/apache/lucene/util/BytesRef.java (revision 930707) +++ src/java/org/apache/lucene/util/BytesRef.java (working copy) @@ -198,7 +198,7 @@ return utf8SortedAsUTF16SortOrder; } - public static class UTF8SortedAsUTF16Comparator implements Comparator { + private static class UTF8SortedAsUTF16Comparator implements Comparator { // Only singleton private UTF8SortedAsUTF16Comparator() {}; Index: src/java/org/apache/lucene/util/NumericUtils.java =================================================================== --- src/java/org/apache/lucene/util/NumericUtils.java (revision 930707) +++ src/java/org/apache/lucene/util/NumericUtils.java (working copy) @@ -22,6 +22,8 @@ import org.apache.lucene.search.NumericRangeQuery; // for javadocs import org.apache.lucene.search.NumericRangeFilter; // for javadocs +// TODO: Remove the commented out methods before release! + /** * This is a helper class to generate prefix-encoded representations for numerical values * and supplies converters to represent float/double values as sortable integers/longs. @@ -32,10 +34,10 @@ * more exactly. This reduces the number of terms dramatically. * *

This class generates terms to achieve this: First the numerical integer values need to - * be converted to strings. For that integer values (32 bit or 64 bit) are made unsigned - * and the bits are converted to ASCII chars with each 7 bit. The resulting string is - * sortable like the original integer value. Each value is also prefixed - * (in the first char) by the shift value (number of bits removed) used + * be converted to bytes. For that integer values (32 bit or 64 bit) are made unsigned + * and the bits are converted to ASCII chars with each 7 bit. The resulting byte[] is + * sortable like the original integer value (even using UTF-8 sort order). Each value is also + * prefixed (in the first char) by the shift value (number of bits removed) used * during encoding. * *

To also index floating point numbers, this class supplies two methods to convert them @@ -51,13 +53,13 @@ * {@link NumericRangeQuery} and {@link NumericRangeFilter} implement the query part * for the same data types. * - *

This class can also be used, to generate lexicographically sortable (according - * {@link String#compareTo(String)}) representations of numeric data types for other - * usages (e.g. sorting). + *

This class can also be used, to generate lexicographically sortable (according to + * {@link BytesRef#getUTF8SortedAsUTF16Comparator()}) representations of numeric data + * types for other usages (e.g. sorting). * * @lucene.experimental * - * @since 2.9 + * @since 2.9, API changed non backwards-compliant in 3.1 */ public final class NumericUtils { @@ -71,27 +73,27 @@ /** * Expert: Longs are stored at lower precision by shifting off lower bits. The shift count is - * stored as SHIFT_START_LONG+shift in the first character + * stored as SHIFT_START_LONG+shift in the first byte */ - public static final char SHIFT_START_LONG = (char)0x20; + public static final byte SHIFT_START_LONG = 0x20; /** - * Expert: The maximum term length (used for char[] buffer size) + * Expert: The maximum term length (used for byte[] buffer size) * for encoding long values. - * @see #longToPrefixCoded(long,int,char[]) + * @see #longToPrefixCoded(long,int,BytesRef) */ public static final int BUF_SIZE_LONG = 63/7 + 2; /** * Expert: Integers are stored at lower precision by shifting off lower bits. The shift count is - * stored as SHIFT_START_INT+shift in the first character + * stored as SHIFT_START_INT+shift in the first byte */ - public static final char SHIFT_START_INT = (char)0x60; + public static final byte SHIFT_START_INT = 0x60; /** - * Expert: The maximum term length (used for char[] buffer size) + * Expert: The maximum term length (used for byte[] buffer size) * for encoding int values. - * @see #intToPrefixCoded(int,int,char[]) + * @see #intToPrefixCoded(int,int,BytesRef) */ public static final int BUF_SIZE_INT = 31/7 + 2; @@ -100,25 +102,33 @@ * This is method is used by {@link NumericTokenStream}. * @param val the numeric value * @param shift how many bits to strip from the right - * @param buffer that will contain the encoded chars, must be at least of {@link #BUF_SIZE_LONG} - * length - * @return number of chars written to buffer + * @param bytes will contain the encoded value + * @return the hash code for indexing (TermsHash) */ - public static int longToPrefixCoded(final long val, final int shift, final char[] buffer) { + public static int longToPrefixCoded(final long val, final int shift, final BytesRef bytes) { if (shift>63 || shift<0) throw new IllegalArgumentException("Illegal shift value, must be 0..63"); - int nChars = (63-shift)/7 + 1, len = nChars+1; - buffer[0] = (char)(SHIFT_START_LONG + shift); + if (bytes.bytes == null) { + bytes.bytes = new byte[NumericUtils.BUF_SIZE_LONG]; + } else if (bytes.bytes.length < NumericUtils.BUF_SIZE_LONG) { + bytes.grow(NumericUtils.BUF_SIZE_LONG); + } + int nChars = (63-shift)/7 + 1; + bytes.length = nChars+1; + int hash = (bytes.bytes[0] = (byte) (SHIFT_START_LONG + shift)); long sortableBits = val ^ 0x8000000000000000L; sortableBits >>>= shift; while (nChars>=1) { - // Store 7 bits per character for good efficiency when UTF-8 encoding. - // The whole number is right-justified so that lucene can prefix-encode - // the terms more efficiently. - buffer[nChars--] = (char)(sortableBits & 0x7f); + // Store 7 bits per byte for compatibility + // with UTF-8 encoding of terms + bytes.bytes[nChars--] = (byte)(sortableBits & 0x7f); sortableBits >>>= 7; } - return len; + // TODO: optimize this to do it in above loop + for (int i = 1; i < bytes.length; i++) { + hash = 31*hash + bytes.bytes[i]; + } + return hash; } /** @@ -126,47 +136,59 @@ * This is method is used by {@link LongRangeBuilder}. * @param val the numeric value * @param shift how many bits to strip from the right - */ + * @deprecated This method is no longer needed! + * + @Deprecated public static String longToPrefixCoded(final long val, final int shift) { - final char[] buffer = new char[BUF_SIZE_LONG]; - final int len = longToPrefixCoded(val, shift, buffer); - return new String(buffer, 0, len); - } + final BytesRef buffer = new BytesRef(BUF_SIZE_LONG); + longToPrefixCoded(val, shift, buffer); + return buffer.utf8ToString(); + }*/ /** * This is a convenience method, that returns prefix coded bits of a long without * reducing the precision. It can be used to store the full precision value as a * stored field in index. *

To decode, use {@link #prefixCodedToLong}. - */ + * @deprecated This method is no longer needed! + * + @Deprecated public static String longToPrefixCoded(final long val) { return longToPrefixCoded(val, 0); - } + }*/ /** * Expert: Returns prefix coded bits after reducing the precision by shift bits. * This is method is used by {@link NumericTokenStream}. * @param val the numeric value * @param shift how many bits to strip from the right - * @param buffer that will contain the encoded chars, must be at least of {@link #BUF_SIZE_INT} - * length - * @return number of chars written to buffer + * @param bytes will contain the encoded value + * @return the hash code for indexing (TermsHash) */ - public static int intToPrefixCoded(final int val, final int shift, final char[] buffer) { + public static int intToPrefixCoded(final int val, final int shift, final BytesRef bytes) { if (shift>31 || shift<0) throw new IllegalArgumentException("Illegal shift value, must be 0..31"); - int nChars = (31-shift)/7 + 1, len = nChars+1; - buffer[0] = (char)(SHIFT_START_INT + shift); + if (bytes.bytes == null) { + bytes.bytes = new byte[NumericUtils.BUF_SIZE_INT]; + } else if (bytes.bytes.length < NumericUtils.BUF_SIZE_INT) { + bytes.grow(NumericUtils.BUF_SIZE_INT); + } + int nChars = (31-shift)/7 + 1; + bytes.length = nChars+1; + int hash = (bytes.bytes[0] = (byte)(SHIFT_START_INT + shift)); int sortableBits = val ^ 0x80000000; sortableBits >>>= shift; while (nChars>=1) { - // Store 7 bits per character for good efficiency when UTF-8 encoding. - // The whole number is right-justified so that lucene can prefix-encode - // the terms more efficiently. - buffer[nChars--] = (char)(sortableBits & 0x7f); + // Store 7 bits per byte for compatibility + // with UTF-8 encoding of terms + bytes.bytes[nChars--] = (byte)(sortableBits & 0x7f); sortableBits >>>= 7; } - return len; + // TODO: optimize this to do it in above loop + for (int i = 1; i < bytes.length; i++) { + hash = 31*hash + bytes.bytes[i]; + } + return hash; } /** @@ -174,22 +196,26 @@ * This is method is used by {@link IntRangeBuilder}. * @param val the numeric value * @param shift how many bits to strip from the right - */ + * @deprecated This method is no longer needed! + * + @Deprecated public static String intToPrefixCoded(final int val, final int shift) { - final char[] buffer = new char[BUF_SIZE_INT]; - final int len = intToPrefixCoded(val, shift, buffer); - return new String(buffer, 0, len); - } + final BytesRef buffer = new BytesRef(BUF_SIZE_INT); + intToPrefixCoded(val, shift, buffer); + return buffer.utf8ToString(); + }*/ /** * This is a convenience method, that returns prefix coded bits of an int without * reducing the precision. It can be used to store the full precision value as a * stored field in index. *

To decode, use {@link #prefixCodedToInt}. - */ + * @deprecated This method is no longer needed! + * + @Deprecated public static String intToPrefixCoded(final int val) { return intToPrefixCoded(val, 0); - } + }*/ /** * Returns a long from prefixCoded characters. @@ -198,42 +224,36 @@ * @throws NumberFormatException if the supplied string is * not correctly prefix encoded. * @see #longToPrefixCoded(long) - */ + * @deprecated This method is no longer needed! + * + @Deprecated public static long prefixCodedToLong(final String prefixCoded) { - final int shift = prefixCoded.charAt(0)-SHIFT_START_LONG; - if (shift>63 || shift<0) - throw new NumberFormatException("Invalid shift value in prefixCoded string (is encoded value really a LONG?)"); - long sortableBits = 0L; - for (int i=1, len=prefixCoded.length(); i0x7f) { - throw new NumberFormatException( - "Invalid prefixCoded numerical value representation (char "+ - Integer.toHexString(ch)+" at position "+i+" is invalid)" - ); - } - sortableBits |= ch; - } - return (sortableBits << shift) ^ 0x8000000000000000L; - } + return prefixCodedToLong(new BytesRef(prefixCoded)); + }*/ + /** + * Returns a long from prefixCoded bytes. + * Rightmost bits will be zero for lower precision codes. + * This method can be used to decode e.g. a stored field. + * @throws NumberFormatException if the supplied {@link BytesRef} is + * not correctly prefix encoded. + * @see #longToPrefixCoded(long) + */ public static long prefixCodedToLong(final BytesRef term) { final int shift = term.bytes[term.offset]-SHIFT_START_LONG; if (shift>63 || shift<0) - throw new NumberFormatException("Invalid shift value in prefixCoded string (is encoded value really an INT?)"); + throw new NumberFormatException("Invalid shift value in prefixCoded bytes (is encoded value really an INT?)"); long sortableBits = 0L; - final int limit = term.offset + term.length; - for (int i=term.offset+1; i31 || shift<0) - throw new NumberFormatException("Invalid shift value in prefixCoded string (is encoded value really an INT?)"); - int sortableBits = 0; - for (int i=1, len=prefixCoded.length(); i0x7f) { - throw new NumberFormatException( - "Invalid prefixCoded numerical value representation (char "+ - Integer.toHexString(ch)+" at position "+i+" is invalid)" - ); - } - sortableBits |= ch; - } - return (sortableBits << shift) ^ 0x80000000; - } + return prefixCodedToInt(new BytesRef(prefixCoded)); + }*/ + /** + * Returns an int from prefixCoded bytes. + * Rightmost bits will be zero for lower precision codes. + * This method can be used to decode e.g. a stored field. + * @throws NumberFormatException if the supplied {@link BytesRef} is + * not correctly prefix encoded. + * @see #intToPrefixCoded(int) + */ public static int prefixCodedToInt(final BytesRef term) { final int shift = term.bytes[term.offset]-SHIFT_START_INT; if (shift>31 || shift<0) - throw new NumberFormatException("Invalid shift value in prefixCoded string (is encoded value really an INT?)"); + throw new NumberFormatException("Invalid shift value in prefixCoded bytes (is encoded value really an INT?)"); int sortableBits = 0; - final int limit = term.offset + term.length; - for (int i=term.offset+1; ilong back to a double. @@ -318,10 +334,12 @@ /** * Convenience method: this just returns: * sortableLongToDouble(prefixCodedToLong(val)) - */ + * @deprecated This method is no longer needed! + * + @Deprecated public static double prefixCodedToDouble(String val) { return sortableLongToDouble(prefixCodedToLong(val)); - } + }*/ /** * Converts a float value to a sortable signed int. @@ -339,10 +357,12 @@ /** * Convenience method: this just returns: * intToPrefixCoded(floatToSortableInt(val)) - */ + * @deprecated This method is no longer needed! + * + @Deprecated public static String floatToPrefixCoded(float val) { return intToPrefixCoded(floatToSortableInt(val)); - } + }*/ /** * Converts a sortable int back to a float. @@ -356,16 +376,18 @@ /** * Convenience method: this just returns: * sortableIntToFloat(prefixCodedToInt(val)) - */ + * @deprecated This method is no longer needed! + * + @Deprecated public static float prefixCodedToFloat(String val) { return sortableIntToFloat(prefixCodedToInt(val)); - } + }*/ /** * Expert: Splits a long range recursively. * You may implement a builder that adds clauses to a * {@link org.apache.lucene.search.BooleanQuery} for each call to its - * {@link LongRangeBuilder#addRange(String,String)} + * {@link LongRangeBuilder#addRange(BytesRef,BytesRef)} * method. *

This method is used by {@link NumericRangeQuery}. */ @@ -379,7 +401,7 @@ * Expert: Splits an int range recursively. * You may implement a builder that adds clauses to a * {@link org.apache.lucene.search.BooleanQuery} for each call to its - * {@link IntRangeBuilder#addRange(String,String)} + * {@link IntRangeBuilder#addRange(BytesRef,BytesRef)} * method. *

This method is used by {@link NumericRangeQuery}. */ @@ -454,8 +476,9 @@ /** * Expert: Callback for {@link #splitLongRange}. * You need to overwrite only one of the methods. - *

NOTE: This is a very low-level interface, - * the method signatures may change in later versions. + * @lucene.experimental NOTE: This is a very low-level interface, + * the method signatures may change in later versions. + * @since 2.9, API changed non backwards-compliant in 3.1 */ public static abstract class LongRangeBuilder { @@ -463,7 +486,7 @@ * Overwrite this method, if you like to receive the already prefix encoded range bounds. * You can directly build classical (inclusive) range queries from them. */ - public void addRange(String minPrefixCoded, String maxPrefixCoded) { + public void addRange(BytesRef minPrefixCoded, BytesRef maxPrefixCoded) { throw new UnsupportedOperationException(); } @@ -472,7 +495,10 @@ * You can use this for e.g. debugging purposes (print out range bounds). */ public void addRange(final long min, final long max, final int shift) { - addRange(longToPrefixCoded(min, shift), longToPrefixCoded(max, shift)); + final BytesRef minBytes = new BytesRef(BUF_SIZE_LONG), maxBytes = new BytesRef(BUF_SIZE_LONG); + longToPrefixCoded(min, shift, minBytes); + longToPrefixCoded(max, shift, maxBytes); + addRange(minBytes, maxBytes); } } @@ -480,8 +506,9 @@ /** * Expert: Callback for {@link #splitIntRange}. * You need to overwrite only one of the methods. - *

NOTE: This is a very low-level interface, - * the method signatures may change in later versions. + * @lucene.experimental NOTE: This is a very low-level interface, + * the method signatures may change in later versions. + * @since 2.9, API changed non backwards-compliant in 3.1 */ public static abstract class IntRangeBuilder { @@ -489,7 +516,7 @@ * Overwrite this method, if you like to receive the already prefix encoded range bounds. * You can directly build classical range (inclusive) queries from them. */ - public void addRange(String minPrefixCoded, String maxPrefixCoded) { + public void addRange(BytesRef minPrefixCoded, BytesRef maxPrefixCoded) { throw new UnsupportedOperationException(); } @@ -498,7 +525,10 @@ * You can use this for e.g. debugging purposes (print out range bounds). */ public void addRange(final int min, final int max, final int shift) { - addRange(intToPrefixCoded(min, shift), intToPrefixCoded(max, shift)); + final BytesRef minBytes = new BytesRef(BUF_SIZE_INT), maxBytes = new BytesRef(BUF_SIZE_INT); + intToPrefixCoded(min, shift, minBytes); + intToPrefixCoded(max, shift, maxBytes); + addRange(minBytes, maxBytes); } } Index: src/test/org/apache/lucene/analysis/TestNumericTokenStream.java =================================================================== --- src/test/org/apache/lucene/analysis/TestNumericTokenStream.java (revision 930707) +++ src/test/org/apache/lucene/analysis/TestNumericTokenStream.java (working copy) @@ -17,8 +17,9 @@ * limitations under the License. */ +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; public class TestNumericTokenStream extends BaseTokenStreamTestCase { @@ -29,27 +30,47 @@ public void testLongStream() throws Exception { final NumericTokenStream stream=new NumericTokenStream().setLongValue(lvalue); // use getAttribute to test if attributes really exist, if not an IAE will be throwed - final TermAttribute termAtt = stream.getAttribute(TermAttribute.class); + final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class); final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class); + final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class); + final BytesRef bytes = new BytesRef(); + stream.reset(); + assertEquals(64, numericAtt.getValueSize()); + assertEquals(lvalue, numericAtt.getRawValue()); for (int shift=0; shift<64; shift+=NumericUtils.PRECISION_STEP_DEFAULT) { assertTrue("New token is available", stream.incrementToken()); - assertEquals("Term is correctly encoded", NumericUtils.longToPrefixCoded(lvalue, shift), termAtt.term()); - assertEquals("Type correct", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type()); + assertEquals("Shift value wrong", shift, numericAtt.getShift()); + final int hash = bytesAtt.toBytesRef(bytes); + assertEquals("Hash incorrect", bytes.hashCode(), hash); + assertEquals("Term is incorrectly encoded", lvalue & ~((1L << shift) - 1L), NumericUtils.prefixCodedToLong(bytes)); + assertEquals("Type incorrect", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type()); } - assertFalse("No more tokens available", stream.incrementToken()); + assertFalse("More tokens available", stream.incrementToken()); + stream.end(); + stream.close(); } public void testIntStream() throws Exception { final NumericTokenStream stream=new NumericTokenStream().setIntValue(ivalue); // use getAttribute to test if attributes really exist, if not an IAE will be throwed - final TermAttribute termAtt = stream.getAttribute(TermAttribute.class); + final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class); final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class); + final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class); + final BytesRef bytes = new BytesRef(); + stream.reset(); + assertEquals(32, numericAtt.getValueSize()); + assertEquals(ivalue, numericAtt.getRawValue()); for (int shift=0; shift<32; shift+=NumericUtils.PRECISION_STEP_DEFAULT) { assertTrue("New token is available", stream.incrementToken()); - assertEquals("Term is correctly encoded", NumericUtils.intToPrefixCoded(ivalue, shift), termAtt.term()); - assertEquals("Type correct", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type()); + assertEquals("Shift value wrong", shift, numericAtt.getShift()); + final int hash = bytesAtt.toBytesRef(bytes); + assertEquals("Hash incorrect", bytes.hashCode(), hash); + assertEquals("Term is incorrectly encoded", ivalue & ~((1 << shift) - 1), NumericUtils.prefixCodedToInt(bytes)); + assertEquals("Type incorrect", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type()); } - assertFalse("No more tokens available", stream.incrementToken()); + assertFalse("More tokens available", stream.incrementToken()); + stream.end(); + stream.close(); } public void testNotInitialized() throws Exception { Index: src/test/org/apache/lucene/search/TestNumericRangeQuery32.java =================================================================== --- src/test/org/apache/lucene/search/TestNumericRangeQuery32.java (revision 930707) +++ src/test/org/apache/lucene/search/TestNumericRangeQuery32.java (working copy) @@ -24,11 +24,11 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericField; import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.util.BytesRef; import org.apache.lucene.index.IndexWriter.MaxFieldLength; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCaseJ4; import org.apache.lucene.util.NumericUtils; @@ -333,9 +333,15 @@ if (lower>upper) { int a=lower; lower=upper; upper=a; } + final BytesRef lowerBytes = new BytesRef(NumericUtils.BUF_SIZE_INT), upperBytes = new BytesRef(NumericUtils.BUF_SIZE_INT); + NumericUtils.intToPrefixCoded(lower, 0, lowerBytes); + NumericUtils.intToPrefixCoded(upper, 0, upperBytes); + // TODO: when new TermRange ctors with BytesRef available, use them and do not convert to string! + final String lowerString = lowerBytes.utf8ToString(), upperString = upperBytes.utf8ToString(); + // test inclusive range NumericRangeQuery tq=NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, true, true); - TermRangeQuery cq=new TermRangeQuery(field, NumericUtils.intToPrefixCoded(lower), NumericUtils.intToPrefixCoded(upper), true, true); + TermRangeQuery cq=new TermRangeQuery(field, lowerString, upperString, true, true); TopDocs tTopDocs = searcher.search(tq, 1); TopDocs cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); @@ -343,7 +349,7 @@ termCountC += cq.getTotalNumberOfTerms(); // test exclusive range tq=NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, false, false); - cq=new TermRangeQuery(field, NumericUtils.intToPrefixCoded(lower), NumericUtils.intToPrefixCoded(upper), false, false); + cq=new TermRangeQuery(field, lowerString, upperString, false, false); tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); @@ -351,7 +357,7 @@ termCountC += cq.getTotalNumberOfTerms(); // test left exclusive range tq=NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, false, true); - cq=new TermRangeQuery(field, NumericUtils.intToPrefixCoded(lower), NumericUtils.intToPrefixCoded(upper), false, true); + cq=new TermRangeQuery(field, lowerString, upperString, false, true); tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); @@ -359,7 +365,7 @@ termCountC += cq.getTotalNumberOfTerms(); // test right exclusive range tq=NumericRangeQuery.newIntRange(field, precisionStep, lower, upper, true, false); - cq=new TermRangeQuery(field, NumericUtils.intToPrefixCoded(lower), NumericUtils.intToPrefixCoded(upper), true, false); + cq=new TermRangeQuery(field, lowerString, upperString, true, false); tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); @@ -558,7 +564,7 @@ while (termEnum.next() != null) { final BytesRef t = termEnum.term(); if (t != null) { - final int val = NumericUtils.prefixCodedToInt(t.utf8ToString()); + final int val = NumericUtils.prefixCodedToInt(t); assertTrue("value not in bounds " + val + " >= " + lower + " && " + val + " <= " + upper, val >= lower && val <= upper); count++; Index: src/test/org/apache/lucene/search/TestNumericRangeQuery64.java =================================================================== --- src/test/org/apache/lucene/search/TestNumericRangeQuery64.java (revision 930707) +++ src/test/org/apache/lucene/search/TestNumericRangeQuery64.java (working copy) @@ -26,6 +26,7 @@ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCaseJ4; import org.apache.lucene.util.NumericUtils; @@ -350,9 +351,15 @@ if (lower>upper) { long a=lower; lower=upper; upper=a; } + final BytesRef lowerBytes = new BytesRef(NumericUtils.BUF_SIZE_LONG), upperBytes = new BytesRef(NumericUtils.BUF_SIZE_LONG); + NumericUtils.longToPrefixCoded(lower, 0, lowerBytes); + NumericUtils.longToPrefixCoded(upper, 0, upperBytes); + // TODO: when new TermRange ctors with BytesRef available, use them and do not convert to string! + final String lowerString = lowerBytes.utf8ToString(), upperString = upperBytes.utf8ToString(); + // test inclusive range NumericRangeQuery tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, true); - TermRangeQuery cq=new TermRangeQuery(field, NumericUtils.longToPrefixCoded(lower), NumericUtils.longToPrefixCoded(upper), true, true); + TermRangeQuery cq=new TermRangeQuery(field, lowerString, upperString, true, true); TopDocs tTopDocs = searcher.search(tq, 1); TopDocs cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); @@ -360,7 +367,7 @@ termCountC += cq.getTotalNumberOfTerms(); // test exclusive range tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, false, false); - cq=new TermRangeQuery(field, NumericUtils.longToPrefixCoded(lower), NumericUtils.longToPrefixCoded(upper), false, false); + cq=new TermRangeQuery(field, lowerString, upperString, false, false); tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); @@ -368,7 +375,7 @@ termCountC += cq.getTotalNumberOfTerms(); // test left exclusive range tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, false, true); - cq=new TermRangeQuery(field, NumericUtils.longToPrefixCoded(lower), NumericUtils.longToPrefixCoded(upper), false, true); + cq=new TermRangeQuery(field, lowerString, upperString, false, true); tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); @@ -376,7 +383,7 @@ termCountC += cq.getTotalNumberOfTerms(); // test right exclusive range tq=NumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, false); - cq=new TermRangeQuery(field, NumericUtils.longToPrefixCoded(lower), NumericUtils.longToPrefixCoded(upper), true, false); + cq=new TermRangeQuery(field, lowerString, upperString, true, false); tTopDocs = searcher.search(tq, 1); cTopDocs = searcher.search(cq, 1); assertEquals("Returned count for NumericRangeQuery and TermRangeQuery must be equal", cTopDocs.totalHits, tTopDocs.totalHits ); Index: src/test/org/apache/lucene/util/TestNumericUtils.java =================================================================== --- src/test/org/apache/lucene/util/TestNumericUtils.java (revision 930707) +++ src/test/org/apache/lucene/util/TestNumericUtils.java (working copy) @@ -25,33 +25,35 @@ public void testLongConversionAndOrdering() throws Exception { // generate a series of encoded longs, each numerical one bigger than the one before - String last=null; + BytesRef last=null, act=new BytesRef(NumericUtils.BUF_SIZE_LONG); for (long l=-100000L; l<100000L; l++) { - String act=NumericUtils.longToPrefixCoded(l); + NumericUtils.longToPrefixCoded(l, 0, act); if (last!=null) { // test if smaller - assertTrue("actual bigger than last", last.compareTo(act) < 0 ); + assertTrue("actual bigger than last", BytesRef.getUTF8SortedAsUTF16Comparator().compare(last, act) < 0 ); } // test is back and forward conversion works assertEquals("forward and back conversion should generate same long", l, NumericUtils.prefixCodedToLong(act)); // next step - last=act; + last = act; + act = new BytesRef(NumericUtils.BUF_SIZE_LONG); } } public void testIntConversionAndOrdering() throws Exception { // generate a series of encoded ints, each numerical one bigger than the one before - String last=null; + BytesRef last=null, act=new BytesRef(NumericUtils.BUF_SIZE_INT); for (int i=-100000; i<100000; i++) { - String act=NumericUtils.intToPrefixCoded(i); + NumericUtils.intToPrefixCoded(i, 0, act); if (last!=null) { // test if smaller - assertTrue("actual bigger than last", last.compareTo(act) < 0 ); + assertTrue("actual bigger than last", BytesRef.getUTF8SortedAsUTF16Comparator().compare(last, act) < 0 ); } // test is back and forward conversion works assertEquals("forward and back conversion should generate same int", i, NumericUtils.prefixCodedToInt(act)); // next step last=act; + act = new BytesRef(NumericUtils.BUF_SIZE_INT); } } @@ -60,10 +62,11 @@ Long.MIN_VALUE, Long.MIN_VALUE+1, Long.MIN_VALUE+2, -5003400000000L, -4000L, -3000L, -2000L, -1000L, -1L, 0L, 1L, 10L, 300L, 50006789999999999L, Long.MAX_VALUE-2, Long.MAX_VALUE-1, Long.MAX_VALUE }; - String[] prefixVals=new String[vals.length]; + BytesRef[] prefixVals=new BytesRef[vals.length]; for (int i=0; i