How do I handle byte order differences when reading/writing floating-point types in C?

后端未结

关注

 6  1682

I\'m devising a file format for my application, and I\'d obviously like for it to work on both big-endian and little-endian systems. I\'ve already found working solutions fo

相关标签:

6条回答

无人共我

2020-12-03 12:27

Another option could be to use double frexp(double value, int *exp); from <math.h> (C99) to break down the floating-point value into a normalized fraction (in the range [0.5, 1)) and an integral power of 2. You can then multiply the fraction by FLT_RADIX^DBL_MANT_DIG to get an integer in the range [FLT_RADIX^DBL_MANT_DIG/2, FLT_RADIX^DBL_MANT_DIG). Then you save both integers big- or little-endian, whichever you choose in your format.

When you load a saved number, you do the reverse operation and use double ldexp(double x, int exp); to multiply the reconstructed fraction by the power of 2.

This will work best when FLT_RADIX=2 (virtually all systems, I suppose?) and DBL_MANT_DIG<=64.

Care must be taken to avoid overflows.

Sample code for doubles:

#include <limits.h>
#include <float.h>
#include <math.h>
#include <string.h>
#include <stdio.h>

#if CHAR_BIT != 8
#error currently supported only CHAR_BIT = 8
#endif

#if FLT_RADIX != 2
#error currently supported only FLT_RADIX = 2
#endif

#ifndef M_PI
#define M_PI 3.14159265358979324
#endif

typedef unsigned char uint8;

/*
  10-byte little-endian serialized format for double:
  - normalized mantissa stored as 64-bit (8-byte) signed integer:
      negative range: (-2^53, -2^52]
      zero: 0
      positive range: [+2^52, +2^53)
  - 16-bit (2-byte) signed exponent:
      range: [-0x7FFE, +0x7FFE]

  Represented value = mantissa * 2^(exponent - 53)

  Special cases:
  - +infinity: mantissa = 0x7FFFFFFFFFFFFFFF, exp = 0x7FFF
  - -infinity: mantissa = 0x8000000000000000, exp = 0x7FFF
  - NaN:       mantissa = 0x0000000000000000, exp = 0x7FFF
  - +/-0:      only one zero supported
*/

void Double2Bytes(uint8 buf[10], double x)
{
  double m;
  long long im; // at least 64 bits
  int ie;
  int i;

  if (isnan(x))
  {
    // NaN
    memcpy(buf, "\x00\x00\x00\x00\x00\x00\x00\x00" "\xFF\x7F", 10);
    return;
  }
  else if (isinf(x))
  {
    if (signbit(x))
      // -inf
      memcpy(buf, "\x00\x00\x00\x00\x00\x00\x00\x80" "\xFF\x7F", 10);
    else
      // +inf
      memcpy(buf, "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F" "\xFF\x7F", 10);
    return;
  }

  // Split double into normalized mantissa (range: (-1, -0.5], 0, [+0.5, +1))
  // and base-2 exponent
  m = frexp(x, &ie); // x = m * 2^ie exactly for FLT_RADIX=2
                     // frexp() can't fail
  // Extract most significant 53 bits of mantissa as integer
  m = ldexp(m, 53); // can't overflow because
                    // DBL_MAX_10_EXP >= 37 equivalent to DBL_MAX_2_EXP >= 122
  im = trunc(m); // exact unless DBL_MANT_DIG > 53

  // If the exponent is too small or too big, reduce the number to 0 or
  // +/- infinity
  if (ie > 0x7FFE)
  {
    if (im < 0)
      // -inf
      memcpy(buf, "\x00\x00\x00\x00\x00\x00\x00\x80" "\xFF\x7F", 10);
    else
      // +inf
      memcpy(buf, "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F" "\xFF\x7F", 10);
    return;
  }
  else if (ie < -0x7FFE)
  {
    // 0
    memcpy(buf, "\x00\x00\x00\x00\x00\x00\x00\x00" "\x00\x00", 10);
    return;
  }

  // Store im as signed 64-bit little-endian integer
  for (i = 0; i < 8; i++, im >>= 8)
    buf[i] = (uint8)im;

  // Store ie as signed 16-bit little-endian integer
  for (i = 8; i < 10; i++, ie >>= 8)
    buf[i] = (uint8)ie;
}

void Bytes2Double(double* x, const uint8 buf[10])
{
  unsigned long long uim; // at least 64 bits
  long long im; // ditto
  unsigned uie;
  int ie;
  double m;
  int i;
  int negative = 0;
  int maxe;

  if (!memcmp(buf, "\x00\x00\x00\x00\x00\x00\x00\x00" "\xFF\x7F", 10))
  {
#ifdef NAN
    *x = NAN;
#else
    *x = 0; // NaN is not supported, use 0 instead (we could return an error)
#endif
    return;
  }

  if (!memcmp(buf, "\x00\x00\x00\x00\x00\x00\x00\x80" "\xFF\x7F", 10))
  {
    *x = -INFINITY;
    return;
  }
  else if (!memcmp(buf, "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F" "\xFF\x7F", 10))
  {
    *x = INFINITY;
    return;
  }

  // Load im as signed 64-bit little-endian integer
  uim = 0;
  for (i = 0; i < 8; i++)
  {
    uim >>= 8;
    uim |= (unsigned long long)buf[i] << (64 - 8);
  }
  if (uim <= 0x7FFFFFFFFFFFFFFFLL)
    im = uim;
  else
    im = (long long)(uim - 0x7FFFFFFFFFFFFFFFLL - 1) - 0x7FFFFFFFFFFFFFFFLL - 1;

  // Obtain the absolute value of the mantissa, make sure it's
  // normalized and fits into 53 bits, else the input is invalid
  if (im > 0)
  {
    if (im < (1LL << 52) || im >= (1LL << 53))
    {
#ifdef NAN
      *x = NAN;
#else
      *x = 0; // NaN is not supported, use 0 instead (we could return an error)
#endif
      return;
    }
  }
  else if (im < 0)
  {
    if (im > -(1LL << 52) || im <= -(1LL << 53))
    {
#ifdef NAN
      *x = NAN;
#else
      *x = 0; // NaN is not supported, use 0 instead (we could return an error)
#endif
      return;
    }
    negative = 1;
    im = -im;
  }

  // Load ie as signed 16-bit little-endian integer
  uie = 0;
  for (i = 8; i < 10; i++)
  {
    uie >>= 8;
    uie |= (unsigned)buf[i] << (16 - 8);
  }
  if (uie <= 0x7FFF)
    ie = uie;
  else
    ie = (int)(uie - 0x7FFF - 1) - 0x7FFF - 1;

  // If DBL_MANT_DIG < 53, truncate the mantissa
  im >>= (53 > DBL_MANT_DIG) ? (53 - DBL_MANT_DIG) : 0;

  m = im;
  m = ldexp(m, (53 > DBL_MANT_DIG) ? -DBL_MANT_DIG : -53); // can't overflow
           // because DBL_MAX_10_EXP >= 37 equivalent to DBL_MAX_2_EXP >= 122

  // Find out the maximum base-2 exponent and
  // if ours is greater, return +/- infinity
  frexp(DBL_MAX, &maxe);
  if (ie > maxe)
    m = INFINITY;
  else
    m = ldexp(m, ie); // underflow may cause a floating-point exception

  *x = negative ? -m : m;
}

int test(double x, const char* name)
{
  uint8 buf[10], buf2[10];
  double x2;
  int error1, error2;

  Double2Bytes(buf, x);
  Bytes2Double(&x2, buf);
  Double2Bytes(buf2, x2);

  printf("%+.15E '%s' -> %02X %02X %02X %02X %02X %02X %02X %02X  %02X %02X\n",
         x,
         name,
         buf[0],buf[1],buf[2],buf[3],buf[4],buf[5],buf[6],buf[7],buf[8],buf[9]);

  if ((error1 = memcmp(&x, &x2, sizeof(x))) != 0)
    puts("Bytes2Double(Double2Bytes(x)) != x");

  if ((error2 = memcmp(buf, buf2, sizeof(buf))) != 0)
    puts("Double2Bytes(Bytes2Double(Double2Bytes(x))) != Double2Bytes(x)");

  puts("");

  return error1 || error2;
}

int testInf(void)
{
  uint8 buf[10];
  double x, x2;
  int error;

  x = DBL_MAX;
  Double2Bytes(buf, x);
  if (!++buf[8])
    ++buf[9]; // increment the exponent beyond the maximum
  Bytes2Double(&x2, buf);

  printf("%02X %02X %02X %02X %02X %02X %02X %02X  %02X %02X -> %+.15E\n",
         buf[0],buf[1],buf[2],buf[3],buf[4],buf[5],buf[6],buf[7],buf[8],buf[9],
         x2);

  if ((error = !isinf(x2)) != 0)
    puts("Bytes2Double(Double2Bytes(DBL_MAX) * 2) != INF");

  puts("");

  return error;
}

#define VALUE_AND_NAME(V) { V, #V }

const struct
{
  double value;
  const char* name;
} testData[] =
{
#ifdef NAN
  VALUE_AND_NAME(NAN),
#endif
  VALUE_AND_NAME(0.0),
  VALUE_AND_NAME(+DBL_MIN),
  VALUE_AND_NAME(-DBL_MIN),
  VALUE_AND_NAME(+1.0),
  VALUE_AND_NAME(-1.0),
  VALUE_AND_NAME(+M_PI),
  VALUE_AND_NAME(-M_PI),
  VALUE_AND_NAME(+DBL_MAX),
  VALUE_AND_NAME(-DBL_MAX),
  VALUE_AND_NAME(+INFINITY),
  VALUE_AND_NAME(-INFINITY),
};

int main(void)
{
  unsigned i;
  int errors = 0;

  for (i = 0; i < sizeof(testData) / sizeof(testData[0]); i++)
    errors += test(testData[i].value, testData[i].name);

  errors += testInf();

  // Test subnormal values. A floating-point exception may be raised.
  errors += test(+DBL_MIN / 2, "+DBL_MIN / 2");
  errors += test(-DBL_MIN / 2, "-DBL_MIN / 2");

  printf("%d error(s)\n", errors);

  return 0;
}

Output (ideone):

+NAN 'NAN' -> 00 00 00 00 00 00 00 00  FF 7F

+0.000000000000000E+00 '0.0' -> 00 00 00 00 00 00 00 00  00 00

+2.225073858507201E-308 '+DBL_MIN' -> 00 00 00 00 00 00 10 00  03 FC

-2.225073858507201E-308 '-DBL_MIN' -> 00 00 00 00 00 00 F0 FF  03 FC

+1.000000000000000E+00 '+1.0' -> 00 00 00 00 00 00 10 00  01 00

-1.000000000000000E+00 '-1.0' -> 00 00 00 00 00 00 F0 FF  01 00

+3.141592653589793E+00 '+M_PI' -> 18 2D 44 54 FB 21 19 00  02 00

-3.141592653589793E+00 '-M_PI' -> E8 D2 BB AB 04 DE E6 FF  02 00

+1.797693134862316E+308 '+DBL_MAX' -> FF FF FF FF FF FF 1F 00  00 04

-1.797693134862316E+308 '-DBL_MAX' -> 01 00 00 00 00 00 E0 FF  00 04

+INF '+INFINITY' -> FF FF FF FF FF FF FF 7F  FF 7F

-INF '-INFINITY' -> 00 00 00 00 00 00 00 80  FF 7F

FF FF FF FF FF FF 1F 00  01 04 -> +INF

+1.112536929253601E-308 '+DBL_MIN / 2' -> 00 00 00 00 00 00 10 00  02 FC

-1.112536929253601E-308 '-DBL_MIN / 2' -> 00 00 00 00 00 00 F0 FF  02 FC

0 error(s)

0 讨论(0)

清歌不尽

2020-12-03 12:35

XML is probably the most portable way to do it.

However, it appears that you already have most of the parser built, but are stuck on the float/double issue. I would suggest writing it out as a string (to whatever precision you desire) and then reading that back in.

Unless all your target platforms use IEEE-754 floats (and doubles), no byte-swapping tricks will work for you.

0 讨论(0)
发布评论:

提交评论
- 加载中...
故里飘歌

2020-12-03 12:37

Depending on the application it could be a good idea to use a plain text data format (a possibility being XML). If you don't want to waste disk space you can compress it.

0 讨论(0)
发布评论:

提交评论
- 加载中...
梦如初夏

2020-12-03 12:37

A library like HDF5 or even NetCDF is probably a bit heavyweight for this as High Performance Mark said, unless you also need the other features available in those libraries.

A lighter-weight alternative that only deals with the serialization would be e.g. XDR (see also wikipedia description). Many OS'es supply XDR routines out of the box, if this is not enough free-standing XDR libraries exist as well.

0 讨论(0)
发布评论:

提交评论
- 加载中...
梦毁少年i

2020-12-03 12:42

If you guarantee that your implementations always treat serialized floating point representations in a specified format, then you will be fine (IEEE 754 is common).

Yes, architectures may order floating point numbers differently (e.g. in big or little endian). Therefore, you will want to somehow specify the endianness. This could be in the format's specification or variable and recorded in the file's data.

The last major pitfall is that alignment for builtins may vary. How your hardware/processor handles malaligned data is implementation defined. So you may need to swap the data/bytes, then move it to the destination float/double.

0 讨论(0)
发布评论:

提交评论
- 加载中...
春和景丽

2020-12-03 12:46
Floating point values use the same byte order as integral values imho. Use an union to overlap them with the respective integral counterpart and use the common hton functions:
```
float htonf(float x) {
   union foo {
     float f;
     uint32_t i;
   } foo = { .f = x };

   foo.i = htonl(foo.i);
   return foo.f;
}
```
0 讨论(0)
发布评论:

提交评论
- 加载中...