Index: lucene/CHANGES.txt =================================================================== --- lucene/CHANGES.txt (revision 1203770) +++ lucene/CHANGES.txt (working copy) @@ -729,6 +729,13 @@ * LUCENE-3548: Fix CharsRef#append to extend length of the existing char[] and preserve existing chars. (Simon Willnauer) +* LUCENE-3582: Normalize NaN values in NumericUtils.floatToSortableInt() / + NumericUtils.doubleToSortableLong(), so this is consistent with stored + fields. Also fix NumericRangeQuery to not falsely hit NaNs on half-open + ranges (one bound is null). Because of normalization, NumericRangeQuery + can now be used to hit NaN values by creating a query with + upper == lower == NaN (inclusive). (Dawid Weiss, Uwe Schindler) + API Changes * LUCENE-3454: Rename IndexWriter.optimize to forceMerge to discourage Index: lucene/src/java/org/apache/lucene/search/NumericRangeQuery.java =================================================================== --- lucene/src/java/org/apache/lucene/search/NumericRangeQuery.java (revision 1203770) +++ lucene/src/java/org/apache/lucene/search/NumericRangeQuery.java (working copy) @@ -23,6 +23,7 @@ import org.apache.lucene.analysis.NumericTokenStream; // for javadocs import org.apache.lucene.document.NumericField; // for javadocs +import org.apache.lucene.document.NumericField.DataType; import org.apache.lucene.index.FilteredTermsEnum; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; @@ -154,15 +155,14 @@ **/ public final class NumericRangeQuery extends MultiTermQuery { - private NumericRangeQuery(final String field, final int precisionStep, final int valSize, + private NumericRangeQuery(final String field, final int precisionStep, final DataType dataType, T min, T max, final boolean minInclusive, final boolean maxInclusive ) { super(field); - assert (valSize == 32 || valSize == 64); if (precisionStep < 1) throw new IllegalArgumentException("precisionStep must be >=1"); this.precisionStep = precisionStep; - this.valSize = valSize; + this.dataType = dataType; this.min = min; this.max = max; this.minInclusive = minInclusive; @@ -172,14 +172,16 @@ // hits too many terms, so set to CONSTANT_SCORE_FILTER right off // (especially as the FilteredTermsEnum is costly if wasted only for AUTO tests because it // creates new enums from IndexReader for each sub-range) - switch (valSize) { - case 64: + switch (dataType) { + case LONG: + case DOUBLE: setRewriteMethod( (precisionStep > 6) ? CONSTANT_SCORE_FILTER_REWRITE : CONSTANT_SCORE_AUTO_REWRITE_DEFAULT ); break; - case 32: + case INT: + case FLOAT: setRewriteMethod( (precisionStep > 8) ? CONSTANT_SCORE_FILTER_REWRITE : CONSTANT_SCORE_AUTO_REWRITE_DEFAULT @@ -187,7 +189,7 @@ break; default: // should never happen - throw new IllegalArgumentException("valSize must be 32 or 64"); + throw new IllegalArgumentException("Invalid numeric DataType"); } // shortcut if upper bound == lower bound @@ -206,7 +208,7 @@ public static NumericRangeQuery newLongRange(final String field, final int precisionStep, Long min, Long max, final boolean minInclusive, final boolean maxInclusive ) { - return new NumericRangeQuery(field, precisionStep, 64, min, max, minInclusive, maxInclusive); + return new NumericRangeQuery(field, precisionStep, DataType.LONG, min, max, minInclusive, maxInclusive); } /** @@ -219,7 +221,7 @@ public static NumericRangeQuery newLongRange(final String field, Long min, Long max, final boolean minInclusive, final boolean maxInclusive ) { - return new NumericRangeQuery(field, NumericUtils.PRECISION_STEP_DEFAULT, 64, min, max, minInclusive, maxInclusive); + return new NumericRangeQuery(field, NumericUtils.PRECISION_STEP_DEFAULT, DataType.LONG, min, max, minInclusive, maxInclusive); } /** @@ -232,7 +234,7 @@ public static NumericRangeQuery newIntRange(final String field, final int precisionStep, Integer min, Integer max, final boolean minInclusive, final boolean maxInclusive ) { - return new NumericRangeQuery(field, precisionStep, 32, min, max, minInclusive, maxInclusive); + return new NumericRangeQuery(field, precisionStep, DataType.INT, min, max, minInclusive, maxInclusive); } /** @@ -245,7 +247,7 @@ public static NumericRangeQuery newIntRange(final String field, Integer min, Integer max, final boolean minInclusive, final boolean maxInclusive ) { - return new NumericRangeQuery(field, NumericUtils.PRECISION_STEP_DEFAULT, 32, min, max, minInclusive, maxInclusive); + return new NumericRangeQuery(field, NumericUtils.PRECISION_STEP_DEFAULT, DataType.INT, min, max, minInclusive, maxInclusive); } /** @@ -258,7 +260,7 @@ public static NumericRangeQuery newDoubleRange(final String field, final int precisionStep, Double min, Double max, final boolean minInclusive, final boolean maxInclusive ) { - return new NumericRangeQuery(field, precisionStep, 64, min, max, minInclusive, maxInclusive); + return new NumericRangeQuery(field, precisionStep, DataType.DOUBLE, min, max, minInclusive, maxInclusive); } /** @@ -271,7 +273,7 @@ public static NumericRangeQuery newDoubleRange(final String field, Double min, Double max, final boolean minInclusive, final boolean maxInclusive ) { - return new NumericRangeQuery(field, NumericUtils.PRECISION_STEP_DEFAULT, 64, min, max, minInclusive, maxInclusive); + return new NumericRangeQuery(field, NumericUtils.PRECISION_STEP_DEFAULT, DataType.DOUBLE, min, max, minInclusive, maxInclusive); } /** @@ -284,7 +286,7 @@ public static NumericRangeQuery newFloatRange(final String field, final int precisionStep, Float min, Float max, final boolean minInclusive, final boolean maxInclusive ) { - return new NumericRangeQuery(field, precisionStep, 32, min, max, minInclusive, maxInclusive); + return new NumericRangeQuery(field, precisionStep, DataType.FLOAT, min, max, minInclusive, maxInclusive); } /** @@ -297,7 +299,7 @@ public static NumericRangeQuery newFloatRange(final String field, Float min, Float max, final boolean minInclusive, final boolean maxInclusive ) { - return new NumericRangeQuery(field, NumericUtils.PRECISION_STEP_DEFAULT, 32, min, max, minInclusive, maxInclusive); + return new NumericRangeQuery(field, NumericUtils.PRECISION_STEP_DEFAULT, DataType.FLOAT, min, max, minInclusive, maxInclusive); } @Override @SuppressWarnings("unchecked") @@ -366,10 +368,21 @@ } // members (package private, to be also fast accessible by NumericRangeTermEnum) - final int precisionStep, valSize; + final int precisionStep; + final DataType dataType; final T min, max; final boolean minInclusive,maxInclusive; + // used to handle float/double infinity correcty + static final long LONG_NEGATIVE_INFINITY = + NumericUtils.doubleToSortableLong(Double.NEGATIVE_INFINITY); + static final long LONG_POSITIVE_INFINITY = + NumericUtils.doubleToSortableLong(Double.POSITIVE_INFINITY); + static final int INT_NEGATIVE_INFINITY = + NumericUtils.floatToSortableInt(Float.NEGATIVE_INFINITY); + static final int INT_POSITIVE_INFINITY = + NumericUtils.floatToSortableInt(Float.POSITIVE_INFINITY); + /** * Subclass of FilteredTermsEnum for enumerating all terms that match the * sub-ranges for trie range queries, using flex API. @@ -386,17 +399,20 @@ private final LinkedList rangeBounds = new LinkedList(); private final Comparator termComp; - + NumericRangeTermsEnum(final TermsEnum tenum) throws IOException { super(tenum); - switch (valSize) { - case 64: { + switch (dataType) { + case LONG: + case DOUBLE: { // lower - long minBound = Long.MIN_VALUE; - if (min instanceof Long) { - minBound = min.longValue(); - } else if (min instanceof Double) { - minBound = NumericUtils.doubleToSortableLong(min.doubleValue()); + long minBound; + if (dataType == DataType.LONG) { + minBound = (min == null) ? Long.MIN_VALUE : min.longValue(); + } else { + assert dataType == DataType.DOUBLE; + minBound = (min == null) ? LONG_NEGATIVE_INFINITY + : NumericUtils.doubleToSortableLong(min.doubleValue()); } if (!minInclusive && min != null) { if (minBound == Long.MAX_VALUE) break; @@ -404,11 +420,13 @@ } // upper - long maxBound = Long.MAX_VALUE; - if (max instanceof Long) { - maxBound = max.longValue(); - } else if (max instanceof Double) { - maxBound = NumericUtils.doubleToSortableLong(max.doubleValue()); + long maxBound; + if (dataType == DataType.LONG) { + maxBound = (max == null) ? Long.MAX_VALUE : max.longValue(); + } else { + assert dataType == DataType.DOUBLE; + maxBound = (max == null) ? LONG_POSITIVE_INFINITY + : NumericUtils.doubleToSortableLong(max.doubleValue()); } if (!maxInclusive && max != null) { if (maxBound == Long.MIN_VALUE) break; @@ -425,13 +443,16 @@ break; } - case 32: { + case INT: + case FLOAT: { // lower - int minBound = Integer.MIN_VALUE; - if (min instanceof Integer) { - minBound = min.intValue(); - } else if (min instanceof Float) { - minBound = NumericUtils.floatToSortableInt(min.floatValue()); + int minBound; + if (dataType == DataType.INT) { + minBound = (min == null) ? Integer.MIN_VALUE : min.intValue(); + } else { + assert dataType == DataType.FLOAT; + minBound = (min == null) ? INT_NEGATIVE_INFINITY + : NumericUtils.floatToSortableInt(min.floatValue()); } if (!minInclusive && min != null) { if (minBound == Integer.MAX_VALUE) break; @@ -439,11 +460,13 @@ } // upper - int maxBound = Integer.MAX_VALUE; - if (max instanceof Integer) { - maxBound = max.intValue(); - } else if (max instanceof Float) { - maxBound = NumericUtils.floatToSortableInt(max.floatValue()); + int maxBound; + if (dataType == DataType.INT) { + maxBound = (max == null) ? Integer.MAX_VALUE : max.intValue(); + } else { + assert dataType == DataType.FLOAT; + maxBound = (max == null) ? INT_POSITIVE_INFINITY + : NumericUtils.floatToSortableInt(max.floatValue()); } if (!maxInclusive && max != null) { if (maxBound == Integer.MIN_VALUE) break; @@ -462,7 +485,7 @@ default: // should never happen - throw new IllegalArgumentException("valSize must be 32 or 64"); + throw new IllegalArgumentException("Invalid numeric DataType"); } termComp = getComparator(); Index: lucene/src/java/org/apache/lucene/util/NumericUtils.java =================================================================== --- lucene/src/java/org/apache/lucene/util/NumericUtils.java (revision 1203770) +++ lucene/src/java/org/apache/lucene/util/NumericUtils.java (working copy) @@ -242,7 +242,7 @@ * @see #sortableLongToDouble */ public static long doubleToSortableLong(double val) { - long f = Double.doubleToRawLongBits(val); + long f = Double.doubleToLongBits(val); if (f<0) f ^= 0x7fffffffffffffffL; return f; } @@ -264,7 +264,7 @@ * @see #sortableIntToFloat */ public static int floatToSortableInt(float val) { - int f = Float.floatToRawIntBits(val); + int f = Float.floatToIntBits(val); if (f<0) f ^= 0x7fffffff; return f; } Index: lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java (revision 1203770) +++ lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java (working copy) @@ -22,7 +22,6 @@ import org.apache.lucene.document.NumericField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader.AtomicReaderContext; -import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.SlowMultiReaderWrapper; @@ -32,6 +31,7 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.NumericUtils; +import org.apache.lucene.util.TestNumericUtils; // NaN arrays import org.apache.lucene.util._TestUtil; import org.junit.AfterClass; import org.junit.BeforeClass; @@ -271,7 +271,8 @@ @Test public void testInfiniteValues() throws Exception { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + RandomIndexWriter writer = new RandomIndexWriter(random, dir, + newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document doc = new Document(); doc.add(new NumericField("float").setFloatValue(Float.NEGATIVE_INFINITY)); doc.add(new NumericField("int").setIntValue(Integer.MIN_VALUE)); @@ -286,6 +287,13 @@ doc.add(new NumericField("float").setFloatValue(0.0f)); doc.add(new NumericField("int").setIntValue(0)); writer.addDocument(doc); + + for (float f : TestNumericUtils.FLOAT_NANs) { + doc = new Document(); + doc.add(new NumericField("float").setFloatValue(f)); + writer.addDocument(doc); + } + writer.close(); IndexReader r = IndexReader.open(dir); @@ -315,6 +323,18 @@ topDocs = s.search(q, 10); assertEquals("Score doc count", 3, topDocs.scoreDocs.length ); + q=NumericRangeQuery.newFloatRange("float", Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY, true, true); + topDocs = s.search(q, 10); + assertEquals("Score doc count", 3, topDocs.scoreDocs.length ); + + q=NumericRangeQuery.newFloatRange("float", Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY, false, false); + topDocs = s.search(q, 10); + assertEquals("Score doc count", 1, topDocs.scoreDocs.length ); + + q=NumericRangeQuery.newFloatRange("float", Float.NaN, Float.NaN, true, true); + topDocs = s.search(q, 10); + assertEquals("Score doc count", TestNumericUtils.FLOAT_NANs.length, topDocs.scoreDocs.length ); + s.close(); r.close(); dir.close(); Index: lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java =================================================================== --- lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java (revision 1203770) +++ lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java (working copy) @@ -21,7 +21,6 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.NumericField; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.SlowMultiReaderWrapper; @@ -32,6 +31,7 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.NumericUtils; +import org.apache.lucene.util.TestNumericUtils; // NaN arrays import org.apache.lucene.util._TestUtil; import org.junit.AfterClass; import org.junit.BeforeClass; @@ -290,8 +290,8 @@ @Test public void testInfiniteValues() throws Exception { Directory dir = newDirectory(); - IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer(random))); + RandomIndexWriter writer = new RandomIndexWriter(random, dir, + newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document doc = new Document(); doc.add(new NumericField("double").setDoubleValue(Double.NEGATIVE_INFINITY)); doc.add(new NumericField("long").setLongValue(Long.MIN_VALUE)); @@ -306,6 +306,13 @@ doc.add(new NumericField("double").setDoubleValue(0.0)); doc.add(new NumericField("long").setLongValue(0L)); writer.addDocument(doc); + + for (double d : TestNumericUtils.DOUBLE_NANs) { + doc = new Document(); + doc.add(new NumericField("double").setDoubleValue(d)); + writer.addDocument(doc); + } + writer.close(); IndexReader r = IndexReader.open(dir); @@ -335,6 +342,18 @@ topDocs = s.search(q, 10); assertEquals("Score doc count", 3, topDocs.scoreDocs.length ); + q=NumericRangeQuery.newDoubleRange("double", Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, true, true); + topDocs = s.search(q, 10); + assertEquals("Score doc count", 3, topDocs.scoreDocs.length ); + + q=NumericRangeQuery.newDoubleRange("double", Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, false, false); + topDocs = s.search(q, 10); + assertEquals("Score doc count", 1, topDocs.scoreDocs.length ); + + q=NumericRangeQuery.newDoubleRange("double", Double.NaN, Double.NaN, true, true); + topDocs = s.search(q, 10); + assertEquals("Score doc count", TestNumericUtils.DOUBLE_NANs.length, topDocs.scoreDocs.length ); + s.close(); r.close(); dir.close(); Index: lucene/src/test/org/apache/lucene/util/TestNumericUtils.java =================================================================== --- lucene/src/test/org/apache/lucene/util/TestNumericUtils.java (revision 1203770) +++ lucene/src/test/org/apache/lucene/util/TestNumericUtils.java (working copy) @@ -143,7 +143,7 @@ public void testDoubles() throws Exception { double[] vals=new double[]{ Double.NEGATIVE_INFINITY, -2.3E25, -1.0E15, -1.0, -1.0E-1, -1.0E-2, -0.0, - +0.0, 1.0E-2, 1.0E-1, 1.0, 1.0E15, 2.3E25, Double.POSITIVE_INFINITY + +0.0, 1.0E-2, 1.0E-1, 1.0, 1.0E15, 2.3E25, Double.POSITIVE_INFINITY, Double.NaN }; long[] longVals=new long[vals.length]; @@ -159,10 +159,28 @@ } } + public static final double[] DOUBLE_NANs = { + Double.NaN, + Double.longBitsToDouble(0x7ff0000000000001L), + Double.longBitsToDouble(0x7fffffffffffffffL), + Double.longBitsToDouble(0xfff0000000000001L), + Double.longBitsToDouble(0xffffffffffffffffL) + }; + + public void testSortableDoubleNaN() { + final long plusInf = NumericUtils.doubleToSortableLong(Double.POSITIVE_INFINITY); + for (double nan : DOUBLE_NANs) { + assertTrue(Double.isNaN(nan)); + final long sortable = NumericUtils.doubleToSortableLong(nan); + assertTrue("Double not sorted correctly: " + nan + ", long repr: " + + sortable + ", positive inf.: " + plusInf, sortable > plusInf); + } + } + public void testFloats() throws Exception { float[] vals=new float[]{ Float.NEGATIVE_INFINITY, -2.3E25f, -1.0E15f, -1.0f, -1.0E-1f, -1.0E-2f, -0.0f, - +0.0f, 1.0E-2f, 1.0E-1f, 1.0f, 1.0E15f, 2.3E25f, Float.POSITIVE_INFINITY + +0.0f, 1.0E-2f, 1.0E-1f, 1.0f, 1.0E15f, 2.3E25f, Float.POSITIVE_INFINITY, Float.NaN }; int[] intVals=new int[vals.length]; @@ -177,7 +195,25 @@ assertTrue( "check sort order", intVals[i-1] < intVals[i] ); } } - + + public static final float[] FLOAT_NANs = { + Float.NaN, + Float.intBitsToFloat(0x7f800001), + Float.intBitsToFloat(0x7fffffff), + Float.intBitsToFloat(0xff800001), + Float.intBitsToFloat(0xffffffff) + }; + + public void testSortableFloatNaN() { + final int plusInf = NumericUtils.floatToSortableInt(Float.POSITIVE_INFINITY); + for (float nan : FLOAT_NANs) { + assertTrue(Float.isNaN(nan)); + final int sortable = NumericUtils.floatToSortableInt(nan); + assertTrue("Float not sorted correctly: " + nan + ", int repr: " + + sortable + ", positive inf.: " + plusInf, sortable > plusInf); + } + } + // INFO: Tests for trieCodeLong()/trieCodeInt() not needed because implicitely tested by range filter tests /** Note: The neededBounds Iterable must be unsigned (easier understanding what's happening) */