Index: src/java/org/apache/lucene/util/NumericUtils.java =================================================================== --- src/java/org/apache/lucene/util/NumericUtils.java (revision 964951) +++ src/java/org/apache/lucene/util/NumericUtils.java (working copy) @@ -21,6 +21,7 @@ import org.apache.lucene.document.NumericField; // for javadocs import org.apache.lucene.search.NumericRangeQuery; // for javadocs import org.apache.lucene.search.NumericRangeFilter; // for javadocs +import java.math.BigInteger; // TODO: Remove the commented out methods before release! @@ -411,7 +412,7 @@ public static void splitLongRange(final LongRangeBuilder builder, final int precisionStep, final long minBound, final long maxBound ) { - splitRange(builder, 64, precisionStep, minBound, maxBound); + splitRange(builder, 64, precisionStep, BigInteger.valueOf(minBound), BigInteger.valueOf(maxBound)); } /** @@ -425,64 +426,77 @@ public static void splitIntRange(final IntRangeBuilder builder, final int precisionStep, final int minBound, final int maxBound ) { - splitRange(builder, 32, precisionStep, minBound, maxBound); + splitRange(builder, 32, precisionStep, BigInteger.valueOf(minBound), BigInteger.valueOf(maxBound)); } /** This helper does the splitting for both 32 and 64 bit. */ private static void splitRange( final Object builder, final int valSize, - final int precisionStep, long minBound, long maxBound + final int precisionStep, BigInteger minBound, BigInteger maxBound ) { if (precisionStep < 1) throw new IllegalArgumentException("precisionStep must be >=1"); - if (minBound > maxBound) return; - for (int shift=0; ; shift += precisionStep) { + if (minBound.compareTo(maxBound) > 0) + return; + + int shift; + for (shift = 0; shift + precisionStep < valSize; shift += precisionStep) { // calculate new bounds for inner precision - final long diff = 1L << (shift+precisionStep), - mask = ((1L<=valSize || nextMinBound>nextMaxBound) { - // We are in the lowest precision or the next precision is not available. - addRange(builder, valSize, minBound, maxBound, shift); - // exit the split recursion loop + hasLower = !minBound.and(mask).equals(BigInteger.ZERO), + hasUpper = !maxBound.and(mask).equals(mask); + final BigInteger + nextMinBound = (hasLower ? minBound.add(diff) : minBound).andNot(mask), + nextMaxBound = (hasUpper ? maxBound.subtract(diff) : maxBound).andNot(mask); + + if (nextMinBound.compareTo(nextMaxBound) > 0) { + // The next precision is not available as our + // calculated bounds for the next iteration overlap each other break; } if (hasLower) - addRange(builder, valSize, minBound, minBound | mask, shift); + addRange(builder, valSize, minBound, minBound.or(mask), shift); if (hasUpper) - addRange(builder, valSize, maxBound & ~mask, maxBound, shift); + addRange(builder, valSize, maxBound.andNot(mask), maxBound, shift); // recurse to next precision minBound = nextMinBound; maxBound = nextMaxBound; } + + // add the last range component + addRange(builder, valSize, minBound, maxBound, shift); } + /** Pre-calculated (1 << n) tables with corrsponding masks */ + private static final BigInteger[] SHIFTS = new BigInteger[64], MASKS = new BigInteger[SHIFTS.length]; + static { + for (int i = 0; i < SHIFTS.length; i++) { + MASKS[i] = (SHIFTS[i] = BigInteger.ONE.shiftLeft(i)).subtract(BigInteger.ONE); + } + } + /** Helper that delegates to correct range builder */ private static void addRange( final Object builder, final int valSize, - long minBound, long maxBound, + BigInteger minBound, BigInteger maxBound, final int shift ) { // for the max bound set all lower bits (that were shifted away): // this is important for testing or other usages of the splitted range // (e.g. to reconstruct the full range). The prefixEncoding will remove // the bits anyway, so they do not hurt! - maxBound |= (1L << shift) - 1L; + maxBound = maxBound.or(MASKS[shift]); // delegate to correct range builder switch(valSize) { case 64: - ((LongRangeBuilder)builder).addRange(minBound, maxBound, shift); + ((LongRangeBuilder)builder).addRange(minBound.longValue(), maxBound.longValue(), shift); break; case 32: - ((IntRangeBuilder)builder).addRange((int)minBound, (int)maxBound, shift); + ((IntRangeBuilder)builder).addRange(minBound.intValue(), maxBound.intValue(), shift); break; default: // Should not happen! Index: src/test/org/apache/lucene/util/TestNumericUtils.java =================================================================== --- src/test/org/apache/lucene/util/TestNumericUtils.java (revision 964951) +++ src/test/org/apache/lucene/util/TestNumericUtils.java (working copy) @@ -20,9 +20,18 @@ import java.util.Arrays; import java.util.Collections; import java.util.Iterator; +import java.util.Random; public class TestNumericUtils extends LuceneTestCase { + private Random random; + + @Override + protected void setUp() throws Exception { + super.setUp(); + random = newRandom(); + } + public void testLongConversionAndOrdering() throws Exception { // generate a series of encoded longs, each numerical one bigger than the one before BytesRef last=null, act=new BytesRef(NumericUtils.BUF_SIZE_LONG); @@ -180,8 +189,8 @@ // INFO: Tests for trieCodeLong()/trieCodeInt() not needed because implicitely tested by range filter tests /** Note: The neededBounds iterator must be unsigned (easier understanding what's happening) */ - protected void assertLongRangeSplit(final long lower, final long upper, int precisionStep, - final boolean useBitSet, final Iterator neededBounds + private void assertLongRangeSplit(final long lower, final long upper, int precisionStep, + final boolean useBitSet, final Iterator neededBounds, final Iterator neededShifts ) throws Exception { final OpenBitSet bits=useBitSet ? new OpenBitSet(upper-lower+1) : null; @@ -191,11 +200,16 @@ assertTrue("min, max should be inside bounds", min>=lower && min<=upper && max>=lower && max<=upper); if (useBitSet) for (long l=min; l<=max; l++) { assertFalse("ranges should not overlap", bits.getAndSet(l-lower) ); + // extra exit condition to prevent overflow on MAX_VALUE + if (l == max) break; } + if (neededBounds == null || neededShifts == null) + return; // make unsigned longs for easier display and understanding min ^= 0x8000000000000000L; max ^= 0x8000000000000000L; - //System.out.println("Long.valueOf(0x"+Long.toHexString(min>>>shift)+"L),Long.valueOf(0x"+Long.toHexString(max>>>shift)+"L),"); + //System.out.println("Long.valueOf(0x"+Long.toHexString(min>>>shift)+"L),Long.valueOf(0x"+Long.toHexString(max>>>shift)+"L)/*shift="+shift+"*/,"); + assertEquals( "shift", neededShifts.next().intValue(), shift); assertEquals( "inner min bound", neededBounds.next().longValue(), min>>>shift); assertEquals( "inner max bound", neededBounds.next().longValue(), max>>>shift); } @@ -208,6 +222,138 @@ } } + /** LUCENE-2541: NumericRangeQuery errors with endpoints near long min and max values */ + public void testLongExtremeValues() throws Exception { + // upper end extremes + assertLongRangeSplit(Long.MAX_VALUE, Long.MAX_VALUE, 1, true, Arrays.asList(new Long[]{ + Long.valueOf(0xffffffffffffffffL),Long.valueOf(0xffffffffffffffffL) + }).iterator(), Arrays.asList(new Integer[]{ + Integer.valueOf(0) + }).iterator()); + assertLongRangeSplit(Long.MAX_VALUE, Long.MAX_VALUE, 2, true, Arrays.asList(new Long[]{ + Long.valueOf(0xffffffffffffffffL),Long.valueOf(0xffffffffffffffffL) + }).iterator(), Arrays.asList(new Integer[]{ + Integer.valueOf(0) + }).iterator()); + assertLongRangeSplit(Long.MAX_VALUE, Long.MAX_VALUE, 4, true, Arrays.asList(new Long[]{ + Long.valueOf(0xffffffffffffffffL),Long.valueOf(0xffffffffffffffffL) + }).iterator(), Arrays.asList(new Integer[]{ + Integer.valueOf(0) + }).iterator()); + assertLongRangeSplit(Long.MAX_VALUE, Long.MAX_VALUE, 6, true, Arrays.asList(new Long[]{ + Long.valueOf(0xffffffffffffffffL),Long.valueOf(0xffffffffffffffffL) + }).iterator(), Arrays.asList(new Integer[]{ + Integer.valueOf(0) + }).iterator()); + assertLongRangeSplit(Long.MAX_VALUE, Long.MAX_VALUE, 8, true, Arrays.asList(new Long[]{ + Long.valueOf(0xffffffffffffffffL),Long.valueOf(0xffffffffffffffffL) + }).iterator(), Arrays.asList(new Integer[]{ + Integer.valueOf(0) + }).iterator()); + assertLongRangeSplit(Long.MAX_VALUE, Long.MAX_VALUE, 64, true, Arrays.asList(new Long[]{ + Long.valueOf(0xffffffffffffffffL),Long.valueOf(0xffffffffffffffffL) + }).iterator(), Arrays.asList(new Integer[]{ + Integer.valueOf(0) + }).iterator()); + + assertLongRangeSplit(Long.MAX_VALUE-0xfL, Long.MAX_VALUE, 4, true, Arrays.asList(new Long[]{ + Long.valueOf(0xfffffffffffffffL),Long.valueOf(0xfffffffffffffffL) + }).iterator(), Arrays.asList(new Integer[]{ + Integer.valueOf(4) + }).iterator()); + assertLongRangeSplit(Long.MAX_VALUE-0x10L, Long.MAX_VALUE, 4, true, Arrays.asList(new Long[]{ + Long.valueOf(0xffffffffffffffefL),Long.valueOf(0xffffffffffffffefL), + Long.valueOf(0xfffffffffffffffL),Long.valueOf(0xfffffffffffffffL) + }).iterator(), Arrays.asList(new Integer[]{ + Integer.valueOf(0), Integer.valueOf(4), + }).iterator()); + + // lower end extremes + assertLongRangeSplit(Long.MIN_VALUE, Long.MIN_VALUE, 1, true, Arrays.asList(new Long[]{ + Long.valueOf(0x0000000000000000L),Long.valueOf(0x0000000000000000L) + }).iterator(), Arrays.asList(new Integer[]{ + Integer.valueOf(0) + }).iterator()); + assertLongRangeSplit(Long.MIN_VALUE, Long.MIN_VALUE, 2, true, Arrays.asList(new Long[]{ + Long.valueOf(0x0000000000000000L),Long.valueOf(0x0000000000000000L) + }).iterator(), Arrays.asList(new Integer[]{ + Integer.valueOf(0) + }).iterator()); + assertLongRangeSplit(Long.MIN_VALUE, Long.MIN_VALUE, 4, true, Arrays.asList(new Long[]{ + Long.valueOf(0x0000000000000000L),Long.valueOf(0x0000000000000000L) + }).iterator(), Arrays.asList(new Integer[]{ + Integer.valueOf(0) + }).iterator()); + assertLongRangeSplit(Long.MIN_VALUE, Long.MIN_VALUE, 6, true, Arrays.asList(new Long[]{ + Long.valueOf(0x0000000000000000L),Long.valueOf(0x0000000000000000L) + }).iterator(), Arrays.asList(new Integer[]{ + Integer.valueOf(0) + }).iterator()); + assertLongRangeSplit(Long.MIN_VALUE, Long.MIN_VALUE, 8, true, Arrays.asList(new Long[]{ + Long.valueOf(0x0000000000000000L),Long.valueOf(0x0000000000000000L) + }).iterator(), Arrays.asList(new Integer[]{ + Integer.valueOf(0) + }).iterator()); + assertLongRangeSplit(Long.MIN_VALUE, Long.MIN_VALUE, 64, true, Arrays.asList(new Long[]{ + Long.valueOf(0x0000000000000000L),Long.valueOf(0x0000000000000000L) + }).iterator(), Arrays.asList(new Integer[]{ + Integer.valueOf(0) + }).iterator()); + + assertLongRangeSplit(Long.MIN_VALUE, Long.MIN_VALUE+0xfL, 4, true, Arrays.asList(new Long[]{ + Long.valueOf(0x000000000000000L),Long.valueOf(0x000000000000000L) + }).iterator(), Arrays.asList(new Integer[]{ + Integer.valueOf(4) + }).iterator()); + assertLongRangeSplit(Long.MIN_VALUE, Long.MIN_VALUE+0x10L, 4, true, Arrays.asList(new Long[]{ + Long.valueOf(0x0000000000000010L),Long.valueOf(0x0000000000000010L), + Long.valueOf(0x000000000000000L),Long.valueOf(0x000000000000000L) + }).iterator(), Arrays.asList(new Integer[]{ + Integer.valueOf(0), Integer.valueOf(4), + }).iterator()); + } + + public void testRandom() throws Exception { + long num = 100L * _TestUtil.getRandomMultiplier(); + for (long i=0; i < num; i++) { + executeOneRandomSplit(); + } + } + + private void executeOneRandomSplit() throws Exception { + long lower = randomLong(); + long len = (long) random.nextInt(16384*1024); // not too large bitsets, else OOME! + while (lower + len < lower) { // overflow + lower >>= 1; + } + assertLongRangeSplit(lower, lower + len, random.nextInt(63) + 1, true, null, null); + } + + private long randomLong() { + long val; + switch(random.nextInt(4)) { + case 0: + val = 1L << (random.nextInt(63)); // patterns like 0x000000100000 (-1 yields patterns like 0x0000fff) + break; + case 1: + val = -1L << (random.nextInt(63)); // patterns like 0xfffff00000 + break; + default: + val = random.nextLong(); + } + + val += random.nextInt(5)-2; + + if (random.nextBoolean()) { + if (random.nextBoolean()) val += random.nextInt(100)-50; + if (random.nextBoolean()) val = ~val; + if (random.nextBoolean()) val = val<<1; + if (random.nextBoolean()) val = val>>>1; + } + + return val; + } + public void testSplitLongRange() throws Exception { // a hard-coded "standard" range assertLongRangeSplit(-5000L, 9500L, 4, true, Arrays.asList(new Long[]{ @@ -218,11 +364,18 @@ Long.valueOf(0x7fffffffffffedL), Long.valueOf(0x7fffffffffffefL), Long.valueOf(0x80000000000020L), Long.valueOf(0x80000000000024L), Long.valueOf(0x7ffffffffffffL), Long.valueOf(0x8000000000001L) + }).iterator(), Arrays.asList(new Integer[]{ + Integer.valueOf(0), Integer.valueOf(0), + Integer.valueOf(4), Integer.valueOf(4), + Integer.valueOf(8), Integer.valueOf(8), + Integer.valueOf(12) }).iterator()); // the same with no range splitting assertLongRangeSplit(-5000L, 9500L, 64, true, Arrays.asList(new Long[]{ Long.valueOf(0x7fffffffffffec78L),Long.valueOf(0x800000000000251cL) + }).iterator(), Arrays.asList(new Integer[]{ + Integer.valueOf(0) }).iterator()); // this tests optimized range splitting, if one of the inner bounds @@ -230,40 +383,52 @@ assertLongRangeSplit(0L, 1024L+63L, 4, true, Arrays.asList(new Long[]{ Long.valueOf(0x800000000000040L), Long.valueOf(0x800000000000043L), Long.valueOf(0x80000000000000L), Long.valueOf(0x80000000000003L) + }).iterator(), Arrays.asList(new Integer[]{ + Integer.valueOf(4), Integer.valueOf(8) }).iterator()); // the full long range should only consist of a lowest precision range; no bitset testing here, as too much memory needed :-) assertLongRangeSplit(Long.MIN_VALUE, Long.MAX_VALUE, 8, false, Arrays.asList(new Long[]{ Long.valueOf(0x00L),Long.valueOf(0xffL) + }).iterator(), Arrays.asList(new Integer[]{ + Integer.valueOf(56) }).iterator()); // the same with precisionStep=4 assertLongRangeSplit(Long.MIN_VALUE, Long.MAX_VALUE, 4, false, Arrays.asList(new Long[]{ Long.valueOf(0x0L),Long.valueOf(0xfL) + }).iterator(), Arrays.asList(new Integer[]{ + Integer.valueOf(60) }).iterator()); // the same with precisionStep=2 assertLongRangeSplit(Long.MIN_VALUE, Long.MAX_VALUE, 2, false, Arrays.asList(new Long[]{ Long.valueOf(0x0L),Long.valueOf(0x3L) + }).iterator(), Arrays.asList(new Integer[]{ + Integer.valueOf(62) }).iterator()); // the same with precisionStep=1 assertLongRangeSplit(Long.MIN_VALUE, Long.MAX_VALUE, 1, false, Arrays.asList(new Long[]{ Long.valueOf(0x0L),Long.valueOf(0x1L) + }).iterator(), Arrays.asList(new Integer[]{ + Integer.valueOf(63) }).iterator()); // a inverse range should produce no sub-ranges - assertLongRangeSplit(9500L, -5000L, 4, false, Collections. emptyList().iterator()); + assertLongRangeSplit(9500L, -5000L, 4, false, Collections.emptyList().iterator(), Collections.emptyList().iterator()); // a 0-length range should reproduce the range itsself assertLongRangeSplit(9500L, 9500L, 4, false, Arrays.asList(new Long[]{ Long.valueOf(0x800000000000251cL),Long.valueOf(0x800000000000251cL) + }).iterator(), Arrays.asList(new Integer[]{ + Integer.valueOf(0) }).iterator()); } /** Note: The neededBounds iterator must be unsigned (easier understanding what's happening) */ - protected void assertIntRangeSplit(final int lower, final int upper, int precisionStep, - final boolean useBitSet, final Iterator neededBounds + private void assertIntRangeSplit(final int lower, final int upper, int precisionStep, + final boolean useBitSet, final Iterator neededBounds, final Iterator neededShifts ) throws Exception { final OpenBitSet bits=useBitSet ? new OpenBitSet(upper-lower+1) : null; @@ -273,11 +438,16 @@ assertTrue("min, max should be inside bounds", min>=lower && min<=upper && max>=lower && max<=upper); if (useBitSet) for (int i=min; i<=max; i++) { assertFalse("ranges should not overlap", bits.getAndSet(i-lower) ); + // extra exit condition to prevent overflow on MAX_VALUE + if (i == max) break; } + if (neededBounds == null) + return; // make unsigned ints for easier display and understanding min ^= 0x80000000; max ^= 0x80000000; - //System.out.println("Integer.valueOf(0x"+Integer.toHexString(min>>>shift)+"),Integer.valueOf(0x"+Integer.toHexString(max>>>shift)+"),"); + //System.out.println("Integer.valueOf(0x"+Integer.toHexString(min>>>shift)+"),Integer.valueOf(0x"+Integer.toHexString(max>>>shift)+")/*shift="+shift+"*/,"); + assertEquals( "shift", neededShifts.next().intValue(), shift); assertEquals( "inner min bound", neededBounds.next().intValue(), min>>>shift); assertEquals( "inner max bound", neededBounds.next().intValue(), max>>>shift); } @@ -300,11 +470,18 @@ Integer.valueOf(0x7fffed), Integer.valueOf(0x7fffef), Integer.valueOf(0x800020), Integer.valueOf(0x800024), Integer.valueOf(0x7ffff), Integer.valueOf(0x80001) + }).iterator(), Arrays.asList(new Integer[]{ + Integer.valueOf(0), Integer.valueOf(0), + Integer.valueOf(4), Integer.valueOf(4), + Integer.valueOf(8), Integer.valueOf(8), + Integer.valueOf(12) }).iterator()); // the same with no range splitting assertIntRangeSplit(-5000, 9500, 32, true, Arrays.asList(new Integer[]{ Integer.valueOf(0x7fffec78),Integer.valueOf(0x8000251c) + }).iterator(), Arrays.asList(new Integer[]{ + Integer.valueOf(0) }).iterator()); // this tests optimized range splitting, if one of the inner bounds @@ -312,34 +489,46 @@ assertIntRangeSplit(0, 1024+63, 4, true, Arrays.asList(new Integer[]{ Integer.valueOf(0x8000040), Integer.valueOf(0x8000043), Integer.valueOf(0x800000), Integer.valueOf(0x800003) + }).iterator(), Arrays.asList(new Integer[]{ + Integer.valueOf(4), Integer.valueOf(8) }).iterator()); // the full int range should only consist of a lowest precision range; no bitset testing here, as too much memory needed :-) assertIntRangeSplit(Integer.MIN_VALUE, Integer.MAX_VALUE, 8, false, Arrays.asList(new Integer[]{ Integer.valueOf(0x00),Integer.valueOf(0xff) + }).iterator(), Arrays.asList(new Integer[]{ + Integer.valueOf(24) }).iterator()); // the same with precisionStep=4 assertIntRangeSplit(Integer.MIN_VALUE, Integer.MAX_VALUE, 4, false, Arrays.asList(new Integer[]{ Integer.valueOf(0x0),Integer.valueOf(0xf) + }).iterator(), Arrays.asList(new Integer[]{ + Integer.valueOf(28) }).iterator()); // the same with precisionStep=2 assertIntRangeSplit(Integer.MIN_VALUE, Integer.MAX_VALUE, 2, false, Arrays.asList(new Integer[]{ Integer.valueOf(0x0),Integer.valueOf(0x3) + }).iterator(), Arrays.asList(new Integer[]{ + Integer.valueOf(30) }).iterator()); // the same with precisionStep=1 assertIntRangeSplit(Integer.MIN_VALUE, Integer.MAX_VALUE, 1, false, Arrays.asList(new Integer[]{ Integer.valueOf(0x0),Integer.valueOf(0x1) + }).iterator(), Arrays.asList(new Integer[]{ + Integer.valueOf(31) }).iterator()); // a inverse range should produce no sub-ranges - assertIntRangeSplit(9500, -5000, 4, false, Collections. emptyList().iterator()); + assertIntRangeSplit(9500, -5000, 4, false, Collections.emptyList().iterator(), Collections.emptyList().iterator()); // a 0-length range should reproduce the range itsself assertIntRangeSplit(9500, 9500, 4, false, Arrays.asList(new Integer[]{ Integer.valueOf(0x8000251c),Integer.valueOf(0x8000251c) + }).iterator(), Arrays.asList(new Integer[]{ + Integer.valueOf(0) }).iterator()); }