Uploaded image for project: 'Apache Avro'
  1. Apache Avro
  2. AVRO-3841

Align the specification of the way to encode NaN to the actual implementations

    XMLWordPrintableJSON

Details

    • Improvement
    • Status: Open
    • Minor
    • Resolution: Unresolved
    • 1.12.0
    • None
    • spec

    Description

      The specification says about the way to encode float/double like as follows.

      a float is written as 4 bytes. The float is converted into a 32-bit integer using a method equivalent to Java’s floatToIntBits and then encoded in little-endian format.
      a double is written as 8 bytes. The double is converted into a 64-bit integer using a method equivalent to Java’s doubleToLongBits and then encoded in little-endian format.
      

      But the actual implementation in Java uses floatToRawIntBits/doubleToRawLongBits rather than floatToIntBits/doubleToLongBits.

      The they are different in the way to encode NaN.
      floatToIntBits/doubleToLongBits doesn't distinguish between NaN and -NaN but floatToRawIntBits/doubleToRawLongBits does.

      I confirmed all the implementation distinguish between NaN and -NaN.
      So, I think it's better to modify the specification.

      Java

        public static int encodeFloat(float f, byte[] buf, int pos) {
          final int bits = Float.floatToRawIntBits(f);
          buf[pos + 3] = (byte) (bits >>> 24);
          buf[pos + 2] = (byte) (bits >>> 16);
          buf[pos + 1] = (byte) (bits >>> 8);
          buf[pos] = (byte) (bits);
          return 4;
        }
      
        public static int encodeDouble(double d, byte[] buf, int pos) {
          final long bits = Double.doubleToRawLongBits(d);
          int first = (int) (bits & 0xFFFFFFFF);
          int second = (int) ((bits >>> 32) & 0xFFFFFFFF);
          // the compiler seems to execute this order the best, likely due to
          // register allocation -- the lifetime of constants is minimized.
          buf[pos] = (byte) (first);
          buf[pos + 4] = (byte) (second);
          buf[pos + 5] = (byte) (second >>> 8);
          buf[pos + 1] = (byte) (first >>> 8);
          buf[pos + 2] = (byte) (first >>> 16);
          buf[pos + 6] = (byte) (second >>> 16);
          buf[pos + 7] = (byte) (second >>> 24);
          buf[pos + 3] = (byte) (first >>> 24);
          return 8;
        }
      

      Rust

      Value::Float(x) => buffer.extend_from_slice(&x.to_le_bytes()),
      Value::Double(x) => buffer.extend_from_slice(&x.to_le_bytes()),
      

      Python

          def write_float(self, datum: float) -> None:                                                                                                  
              """                                                                                                                                       
              A float is written as 4 bytes.                                                                                                            
              The float is converted into a 32-bit integer using a method equivalent to                                                                 
              Java's floatToIntBits and then encoded in little-endian format.                                                                           
              """                                                                                                                                       
              self.write(STRUCT_FLOAT.pack(datum)) 
      
          def write_double(self, datum: float) -> None:                                                                                                 
              """                                                                                                                                       
              A double is written as 8 bytes.                                                                                                           
              The double is converted into a 64-bit integer using a method equivalent to                                                                
              Java's doubleToLongBits and then encoded in little-endian format.                                                                         
              """                                                                                                                                       
              self.write(STRUCT_DOUBLE.pack(datum))
      

      C

      static int write_float(avro_writer_t writer, const float f)
      {
      #if AVRO_PLATFORM_IS_BIG_ENDIAN
              uint8_t buf[4];
      #endif
              union {
                      float f;
                      int32_t i;
              } v;
      
              v.f = f;
      #if AVRO_PLATFORM_IS_BIG_ENDIAN
              buf[0] = (uint8_t) (v.i >> 0);
              buf[1] = (uint8_t) (v.i >> 8);
              buf[2] = (uint8_t) (v.i >> 16);
              buf[3] = (uint8_t) (v.i >> 24);
              AVRO_WRITE(writer, buf, 4);
      #else
              AVRO_WRITE(writer, (void *)&v.i, 4);
      #endif
              return 0;
      }
      
      static int write_double(avro_writer_t writer, const double d)
      {
      #if AVRO_PLATFORM_IS_BIG_ENDIAN
              uint8_t buf[8];
      #endif
              union {
                      double d;
                      int64_t l;
              } v;
      
              v.d = d;
      #if AVRO_PLATFORM_IS_BIG_ENDIAN
              buf[0] = (uint8_t) (v.l >> 0);
              buf[1] = (uint8_t) (v.l >> 8);
              buf[2] = (uint8_t) (v.l >> 16);
              buf[3] = (uint8_t) (v.l >> 24);
              buf[4] = (uint8_t) (v.l >> 32);
              buf[5] = (uint8_t) (v.l >> 40);
              buf[6] = (uint8_t) (v.l >> 48);
              buf[7] = (uint8_t) (v.l >> 56);
              AVRO_WRITE(writer, buf, 8);
      #else
              AVRO_WRITE(writer, (void *)&v.l, 8);
      #endif
              return 0;
      }
      

      C++

      void BinaryEncoder::encodeFloat(float f) {
          const auto *p = reinterpret_cast<const uint8_t *>(&f);
          out_.writeBytes(p, sizeof(float));
      }
      
      void BinaryEncoder::encodeDouble(double d) {
          const auto *p = reinterpret_cast<const uint8_t *>(&d);
          out_.writeBytes(p, sizeof(double));
      }
      

      C#

              public void WriteFloat(float value)
              {
                  byte[] buffer = BitConverter.GetBytes(value);
                  if (!BitConverter.IsLittleEndian) Array.Reverse(buffer);
                  writeBytes(buffer);
              }
      
              public void WriteDouble(double value)
              {
                  long bits = BitConverter.DoubleToInt64Bits(value);
      
                  writeByte((byte)(bits & 0xFF));
                  writeByte((byte)((bits >> 8) & 0xFF));
                  writeByte((byte)((bits >> 16) & 0xFF));
                  writeByte((byte)((bits >> 24) & 0xFF));
                  writeByte((byte)((bits >> 32) & 0xFF));
                  writeByte((byte)((bits >> 40) & 0xFF));
                  writeByte((byte)((bits >> 48) & 0xFF));
                  writeByte((byte)((bits >> 56) & 0xFF));
      
              }
      

      Ruby

            def read_float
              # A float is written as 4 bytes.
              # The float is converted into a 32-bit integer using a method
              # equivalent to Java's floatToRawIntBits and then encoded in
              # little-endian format.
              read_and_unpack(4, 'e')
            end
      
            def read_double
              #  A double is written as 8 bytes.
              # The double is converted into a 64-bit integer using a method
              # equivalent to Java's doubleToRawLongBits and then encoded in
              # little-endian format.
              read_and_unpack(8, 'E')
            end
      

      Perl

      sub encode_float {
          my $class = shift;
          my ($schema, $data, $cb) = @_;
          my $enc = pack "f<", $data;
          $cb->(\$enc);
      }
      
      sub encode_double {
          my $class = shift;
          my ($schema, $data, $cb) = @_;
          my $enc = pack "d<", $data;
          $cb->(\$enc);
      }
      

      PHP

          public static function floatToIntBits($float)
          {
              return pack('g', (float) $float);
          }
      
          public static function doubleToLongBits($double)
          {
              return pack('e', (double) $double);
          }
      

      JavaScript

      Tap.prototype.writeFloat = function (f) {
        var buf = this.buf;
        var pos = this.pos;
        this.pos += 4;
        if (this.pos > buf.length) {
          return;
        }
        return this.buf.writeFloatLE(f, pos);
      };
      
      Tap.prototype.writeDouble = function (d) {
        var buf = this.buf;
        var pos = this.pos;
        this.pos += 8;
        if (this.pos > buf.length) {
          return;
        }
        return this.buf.writeDoubleLE(d, pos);
      };
      

      Attachments

        Issue Links

          Activity

            People

              Unassigned Unassigned
              sarutak Kousuke Saruta
              Votes:
              0 Vote for this issue
              Watchers:
              2 Start watching this issue

              Dates

                Created:
                Updated:

                Time Tracking

                  Estimated:
                  Original Estimate - Not Specified
                  Not Specified
                  Remaining:
                  Remaining Estimate - 0h
                  0h
                  Logged:
                  Time Spent - 20m
                  20m