Uploaded image for project: 'Lucene - Core'
  1. Lucene - Core
  2. LUCENE-530

Extend NumberTools to support int/long/float/double to string

    XMLWordPrintableJSON

Details

    • Improvement
    • Status: Closed
    • Minor
    • Resolution: Won't Fix
    • 1.9
    • None
    • modules/analysis
    • None

    Description

      Extend Number tools to support int/long/float/double to string

      So you can search using range queries on int/long/float/double, if you want.

      Here is the basis for how NumberTools cold be extended to support int/long/double/float.
      As I only write these values to the index and fix tokenisation in searchesI was not so fussed about the reverse transformations back to Strings.

      public class NumericEncoder
      {
      /*

      • Constants for integer encoding
        */

      static int INTEGER_SIGN_MASK = 0x80000000;

      /*

      • Constants for long encoding
        */

      static long LONG_SIGN_MASK = 0x8000000000000000L;

      /*

      • Constants for float encoding
        */

      static int FLOAT_SIGN_MASK = 0x80000000;

      static int FLOAT_EXPONENT_MASK = 0x7F800000;

      static int FLOAT_MANTISSA_MASK = 0x007FFFFF;

      /*

      • Constants for double encoding
        */

      static long DOUBLE_SIGN_MASK = 0x8000000000000000L;

      static long DOUBLE_EXPONENT_MASK = 0x7FF0000000000000L;

      static long DOUBLE_MANTISSA_MASK = 0x000FFFFFFFFFFFFFL;

      private NumericEncoder()

      { super(); }

      /**
      * Encode an integer into a string that orders correctly using string
      * comparison Integer.MIN_VALUE encodes as 00000000 and MAX_VALUE as
      * ffffffff.
      *
      * @param intToEncode
      * @return
      */
      public static String encode(int intToEncode)
      { int replacement = intToEncode ^ INTEGER_SIGN_MASK; return encodeToHex(replacement); }

      /**
      * Encode a long into a string that orders correctly using string comparison
      * Long.MIN_VALUE encodes as 0000000000000000 and MAX_VALUE as
      * ffffffffffffffff.
      *
      * @param longToEncode
      * @return
      */
      public static String encode(long longToEncode)
      { long replacement = longToEncode ^ LONG_SIGN_MASK; return encodeToHex(replacement); }

      /**
      * Encode a float into a string that orders correctly according to string
      * comparison. Note that there is no negative NaN but there are codings that
      * imply this. So NaN and -Infinity may not compare as expected.
      *
      * @param floatToEncode
      * @return
      */
      public static String encode(float floatToEncode)
      {
      int bits = Float.floatToIntBits(floatToEncode);
      int sign = bits & FLOAT_SIGN_MASK;
      int exponent = bits & FLOAT_EXPONENT_MASK;
      int mantissa = bits & FLOAT_MANTISSA_MASK;
      if (sign != 0)
      { exponent ^= FLOAT_EXPONENT_MASK; mantissa ^= FLOAT_MANTISSA_MASK; }
      sign ^= FLOAT_SIGN_MASK;
      int replacement = sign | exponent | mantissa;
      return encodeToHex(replacement);
      }

      /**
      * Encode a double into a string that orders correctly according to string
      * comparison. Note that there is no negative NaN but there are codings that
      * imply this. So NaN and -Infinity may not compare as expected.
      *
      * @param doubleToEncode
      * @return
      */
      public static String encode(double doubleToEncode)
      {
      long bits = Double.doubleToLongBits(doubleToEncode);
      long sign = bits & DOUBLE_SIGN_MASK;
      long exponent = bits & DOUBLE_EXPONENT_MASK;
      long mantissa = bits & DOUBLE_MANTISSA_MASK;
      if (sign != 0)
      { exponent ^= DOUBLE_EXPONENT_MASK; mantissa ^= DOUBLE_MANTISSA_MASK; }
      sign ^= DOUBLE_SIGN_MASK;
      long replacement = sign | exponent | mantissa;
      return encodeToHex(replacement);
      }

      private static String encodeToHex(int i)
      {
      char[] buf = new char[] { '0', '0', '0', '0', '0', '0', '0', '0' };
      int charPos = 8;
      do
      { buf[--charPos] = DIGITS[i & MASK]; i >>>= 4; }
      while (i != 0);
      return new String(buf);
      }

      private static String encodeToHex(long l)
      {
      char[] buf = new char[] { '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0' };
      int charPos = 16;
      do
      { buf[--charPos] = DIGITS[(int) l & MASK]; l >>>= 4; }
      while (l != 0);
      return new String(buf);
      }

      private static final char[] DIGITS = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };

      private static final int MASK = (1 << 4) - 1;
      }
























      public class NumericEncodingTest extends TestCase
      {

      public NumericEncodingTest()
      { super(); }

      public NumericEncodingTest(String arg0)

      { super(arg0); }

      /**

      • Do an exhaustive test for integers
      • */
        public void xtestAllIntegerEncodings()
        {
        String lastString = null;
        String nextString = null;
        for (long i = Integer.MIN_VALUE; i <= Integer.MAX_VALUE; i++)

        Unknown macro: { nextString = NumericEncoder.encode((int) i); if (lastString != null) { assertFalse(lastString.compareTo(nextString) > 0); } lastString = nextString; }

        }

      /**

      • Do an exhaustive test for float
      • */
        public void xtestAllFloatEncodings()
        {
        Float last = null;
        Float next = null;
        String lastString = null;
        String nextString = null;

      for (int sign = 1; sign >= 0; sign--)
      {
      if (sign == 0)
      {
      for (int exponent = 0; exponent <= 0xFF; exponent++)
      {
      for (int mantissa = 0; mantissa <= 0x007FFFFF; mantissa++)
      {
      int bitPattern = sign << 31 | exponent << 23 | mantissa;
      next = Float.intBitsToFloat(bitPattern);

      if (!next.equals(Float.NaN) && (last != null) && (last.compareTo(next) > 0))

      { System.err.println(last + " > " + next); }
      if (!next.equals(Float.NaN))
      {
      nextString = NumericEncoder.encode(next);
      if ((lastString != null) && (lastString.compareTo(nextString) > 0))
      { System.err.println(lastString + " > " + nextString); }
      lastString = nextString;
      }
      last = next;

      }
      }
      }
      else
      {
      for (int exponent = 0xFF; exponent >= 0; exponent--)
      {
      for (int mantissa = 0x007FFFFF; mantissa >= 0; mantissa--)
      {
      int bitPattern = sign << 31 | exponent << 23 | mantissa;
      next = Float.intBitsToFloat(bitPattern);
      if (!next.equals(Float.NaN) && (last != null) && (last.compareTo(next) > 0))
      { System.err.println(last + " > " + next); }

      if (!next.equals(Float.NaN))
      {
      nextString = NumericEncoder.encode(next);
      if ((lastString != null) && (lastString.compareTo(nextString) > 0))

      { System.err.println(lastString + " > " + nextString); }

      lastString = nextString;
      }
      last = next;
      }
      }
      }
      }
      }

      /*

      • Sample test for int
        */

      public void testIntegerEncoding()

      { assertEquals("00000000", NumericEncoder.encode(Integer.MIN_VALUE)); assertEquals("00000001", NumericEncoder.encode(Integer.MIN_VALUE + 1)); assertEquals("7fffffff", NumericEncoder.encode(-1)); assertEquals("80000000", NumericEncoder.encode(0)); assertEquals("80000001", NumericEncoder.encode(1)); assertEquals("fffffffe", NumericEncoder.encode(Integer.MAX_VALUE - 1)); assertEquals("ffffffff", NumericEncoder.encode(Integer.MAX_VALUE)); }

      /*

      • Sample test for long
        */

      public void testLongEncoding()

      { assertEquals("0000000000000000", NumericEncoder.encode(Long.MIN_VALUE)); assertEquals("0000000000000001", NumericEncoder.encode(Long.MIN_VALUE + 1)); assertEquals("7fffffffffffffff", NumericEncoder.encode(-1L)); assertEquals("8000000000000000", NumericEncoder.encode(0L)); assertEquals("8000000000000001", NumericEncoder.encode(1L)); assertEquals("fffffffffffffffe", NumericEncoder.encode(Long.MAX_VALUE - 1)); assertEquals("ffffffffffffffff", NumericEncoder.encode(Long.MAX_VALUE)); }

      /*

      • Sample test for float
        */

      public void testFloatEncoding()

      { assertEquals("007fffff", NumericEncoder.encode(Float.NEGATIVE_INFINITY)); assertEquals("00800000", NumericEncoder.encode(-Float.MAX_VALUE)); assertEquals("7ffffffe", NumericEncoder.encode(-Float.MIN_VALUE)); assertEquals("7fffffff", NumericEncoder.encode(-0f)); assertEquals("80000000", NumericEncoder.encode(0f)); assertEquals("80000001", NumericEncoder.encode(Float.MIN_VALUE)); assertEquals("ff7fffff", NumericEncoder.encode(Float.MAX_VALUE)); assertEquals("ff800000", NumericEncoder.encode(Float.POSITIVE_INFINITY)); assertEquals("ffc00000", NumericEncoder.encode(Float.NaN)); }

      /*

      • Sample test for double
        */

      public void testDoubleEncoding()

      { assertEquals("000fffffffffffff", NumericEncoder.encode(Double.NEGATIVE_INFINITY)); assertEquals("0010000000000000", NumericEncoder.encode(-Double.MAX_VALUE)); assertEquals("7ffffffffffffffe", NumericEncoder.encode(-Double.MIN_VALUE)); assertEquals("7fffffffffffffff", NumericEncoder.encode(-0d)); assertEquals("8000000000000000", NumericEncoder.encode(0d)); assertEquals("8000000000000001", NumericEncoder.encode(Double.MIN_VALUE)); assertEquals("ffefffffffffffff", NumericEncoder.encode(Double.MAX_VALUE)); assertEquals("fff0000000000000", NumericEncoder.encode(Double.POSITIVE_INFINITY)); assertEquals("fff8000000000000", NumericEncoder.encode(Double.NaN)); }

      }

      Attachments

        Activity

          People

            Unassigned Unassigned
            andyhind Andy Hind
            Votes:
            2 Vote for this issue
            Watchers:
            3 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved: