Details
-
Improvement
-
Status: Open
-
Minor
-
Resolution: Unresolved
-
1.12.0
-
None
Description
The specification says about the way to encode float/double like as follows.
a float is written as 4 bytes. The float is converted into a 32-bit integer using a method equivalent to Java’s floatToIntBits and then encoded in little-endian format. a double is written as 8 bytes. The double is converted into a 64-bit integer using a method equivalent to Java’s doubleToLongBits and then encoded in little-endian format.
But the actual implementation in Java uses floatToRawIntBits/doubleToRawLongBits rather than floatToIntBits/doubleToLongBits.
The they are different in the way to encode NaN.
floatToIntBits/doubleToLongBits doesn't distinguish between NaN and -NaN but floatToRawIntBits/doubleToRawLongBits does.
I confirmed all the implementation distinguish between NaN and -NaN.
So, I think it's better to modify the specification.
Java
public static int encodeFloat(float f, byte[] buf, int pos) { final int bits = Float.floatToRawIntBits(f); buf[pos + 3] = (byte) (bits >>> 24); buf[pos + 2] = (byte) (bits >>> 16); buf[pos + 1] = (byte) (bits >>> 8); buf[pos] = (byte) (bits); return 4; } public static int encodeDouble(double d, byte[] buf, int pos) { final long bits = Double.doubleToRawLongBits(d); int first = (int) (bits & 0xFFFFFFFF); int second = (int) ((bits >>> 32) & 0xFFFFFFFF); // the compiler seems to execute this order the best, likely due to // register allocation -- the lifetime of constants is minimized. buf[pos] = (byte) (first); buf[pos + 4] = (byte) (second); buf[pos + 5] = (byte) (second >>> 8); buf[pos + 1] = (byte) (first >>> 8); buf[pos + 2] = (byte) (first >>> 16); buf[pos + 6] = (byte) (second >>> 16); buf[pos + 7] = (byte) (second >>> 24); buf[pos + 3] = (byte) (first >>> 24); return 8; }
Rust
Value::Float(x) => buffer.extend_from_slice(&x.to_le_bytes()), Value::Double(x) => buffer.extend_from_slice(&x.to_le_bytes()),
Python
def write_float(self, datum: float) -> None: """ A float is written as 4 bytes. The float is converted into a 32-bit integer using a method equivalent to Java's floatToIntBits and then encoded in little-endian format. """ self.write(STRUCT_FLOAT.pack(datum)) def write_double(self, datum: float) -> None: """ A double is written as 8 bytes. The double is converted into a 64-bit integer using a method equivalent to Java's doubleToLongBits and then encoded in little-endian format. """ self.write(STRUCT_DOUBLE.pack(datum))
C
static int write_float(avro_writer_t writer, const float f) { #if AVRO_PLATFORM_IS_BIG_ENDIAN uint8_t buf[4]; #endif union { float f; int32_t i; } v; v.f = f; #if AVRO_PLATFORM_IS_BIG_ENDIAN buf[0] = (uint8_t) (v.i >> 0); buf[1] = (uint8_t) (v.i >> 8); buf[2] = (uint8_t) (v.i >> 16); buf[3] = (uint8_t) (v.i >> 24); AVRO_WRITE(writer, buf, 4); #else AVRO_WRITE(writer, (void *)&v.i, 4); #endif return 0; } static int write_double(avro_writer_t writer, const double d) { #if AVRO_PLATFORM_IS_BIG_ENDIAN uint8_t buf[8]; #endif union { double d; int64_t l; } v; v.d = d; #if AVRO_PLATFORM_IS_BIG_ENDIAN buf[0] = (uint8_t) (v.l >> 0); buf[1] = (uint8_t) (v.l >> 8); buf[2] = (uint8_t) (v.l >> 16); buf[3] = (uint8_t) (v.l >> 24); buf[4] = (uint8_t) (v.l >> 32); buf[5] = (uint8_t) (v.l >> 40); buf[6] = (uint8_t) (v.l >> 48); buf[7] = (uint8_t) (v.l >> 56); AVRO_WRITE(writer, buf, 8); #else AVRO_WRITE(writer, (void *)&v.l, 8); #endif return 0; }
C++
void BinaryEncoder::encodeFloat(float f) { const auto *p = reinterpret_cast<const uint8_t *>(&f); out_.writeBytes(p, sizeof(float)); } void BinaryEncoder::encodeDouble(double d) { const auto *p = reinterpret_cast<const uint8_t *>(&d); out_.writeBytes(p, sizeof(double)); }
C#
public void WriteFloat(float value) { byte[] buffer = BitConverter.GetBytes(value); if (!BitConverter.IsLittleEndian) Array.Reverse(buffer); writeBytes(buffer); } public void WriteDouble(double value) { long bits = BitConverter.DoubleToInt64Bits(value); writeByte((byte)(bits & 0xFF)); writeByte((byte)((bits >> 8) & 0xFF)); writeByte((byte)((bits >> 16) & 0xFF)); writeByte((byte)((bits >> 24) & 0xFF)); writeByte((byte)((bits >> 32) & 0xFF)); writeByte((byte)((bits >> 40) & 0xFF)); writeByte((byte)((bits >> 48) & 0xFF)); writeByte((byte)((bits >> 56) & 0xFF)); }
Ruby
def read_float # A float is written as 4 bytes. # The float is converted into a 32-bit integer using a method # equivalent to Java's floatToRawIntBits and then encoded in # little-endian format. read_and_unpack(4, 'e') end def read_double # A double is written as 8 bytes. # The double is converted into a 64-bit integer using a method # equivalent to Java's doubleToRawLongBits and then encoded in # little-endian format. read_and_unpack(8, 'E') end
Perl
sub encode_float { my $class = shift; my ($schema, $data, $cb) = @_; my $enc = pack "f<", $data; $cb->(\$enc); } sub encode_double { my $class = shift; my ($schema, $data, $cb) = @_; my $enc = pack "d<", $data; $cb->(\$enc); }
PHP
public static function floatToIntBits($float) { return pack('g', (float) $float); } public static function doubleToLongBits($double) { return pack('e', (double) $double); }
JavaScript
Tap.prototype.writeFloat = function (f) { var buf = this.buf; var pos = this.pos; this.pos += 4; if (this.pos > buf.length) { return; } return this.buf.writeFloatLE(f, pos); }; Tap.prototype.writeDouble = function (d) { var buf = this.buf; var pos = this.pos; this.pos += 8; if (this.pos > buf.length) { return; } return this.buf.writeDoubleLE(d, pos); };
Attachments
Issue Links
- links to