Write your own implementation of math's floor function, C

问题

I was thinking about the floor function available in math.h. It is very easy to use it:

#include <stdio.h>
#include <math.h>

int main(void)
{
  for (double a = 12.5; a < 13.4; a += 0.1)
    printf("floor of  %.1lf is  %.1lf\n", a, floor(a));
  return 0;
}

What if I would like to write my own implementation of it? Would it look simply like this:

#include <stdio.h>
#include <math.h>

double my_floor(double num)
{
    return (int)num;
}

int main(void)
{
    double a;

    for (a = 12.5; a < 13.4; a += 0.1)
        printf("floor of  %.1lf is  %.1lf\n", a, floor(a));

    printf("\n\n");

    for (a = 12.5; a < 13.4; a += 0.1)
        printf("floor of  %.1lf is  %.1lf\n", a, my_floor(a));

    return 0;
}

It seems it does not work with negative numbers (my_floor), but the second one seems to be fine (my_floor_2):

#include <stdio.h>
#include <math.h>

double my_floor(double num)
{
    return (int)num;
}

double my_floor_2(double num)
{
    if(num < 0)
        return (int)num - 1;
    else
        return (int)num;
}

int main(void)
{
    double a1 = -12.5;

    printf("%lf\n", floor(a1));
    printf("%lf\n", my_floor(a1));
    printf("%lf\n", my_floor_2(a1));

    return 0;
}

program output:

Is one of them eventually correct or not?

回答1:

No you can't tackle it this way. The best way of writing your own implementation is to steal the one from the C Standard Library on your platform. But note that might contain platform specific nuances so might not be portable.

The C Standard Library floor function is typically clever in that it doesn't work by taking a conversion to an integral type. If it did then you'd run the risk of signed integer overflow, the behaviour of which is undefined. (Note that the smallest possible range for an int is -32767 to +32767).

The precise implementation is also dependent on the floating point scheme used on your platform.

For a platform using IEEE754 floating point, and a long long type you could adopt this scheme:

If the magnitude of the number is greater than the 53rd power of 2, return it back (as it's already integral).
Else, cast to a 64 bit type (long long), and return it back.

回答2:

Both of your attempts have limitations:

If the double value is outside the range of the int type, converting to int is implementation defined.
If the double value is negative but integral, returning (int)num - 1 is incorrect.

Here is an (almost) portable version that tries to handle all cases:

double my_floor_2(double num) {
    if (num >= LLONG_MAX || num <= LLONG_MIN || num != num) {
        /* handle large values, infinities and nan */
        return num;
    }
    long long n = (long long)num;
    double d = (double)n;
    if (d == num || num >= 0)
        return d;
    else
        return d - 1;
}

It should be correct if type long long has more value bits than type double, which is the case on most modern systems.

回答3:

In C++ and 32 bit arithmetics it can be done for example like this:

//---------------------------------------------------------------------------
// IEEE 754 double MSW masks
const DWORD _f64_sig    =0x80000000;    // sign
const DWORD _f64_exp    =0x7FF00000;    // exponent
const DWORD _f64_exp_sig=0x40000000;    // exponent sign
const DWORD _f64_exp_bia=0x3FF00000;    // exponent bias
const DWORD _f64_exp_lsb=0x00100000;    // exponent LSB
const DWORD _f64_exp_pos=        20;    // exponent LSB bit position
const DWORD _f64_man    =0x000FFFFF;    // mantisa
const DWORD _f64_man_msb=0x00080000;    // mantisa MSB
const DWORD _f64_man_bits=       52;    // mantisa bits
// IEEE 754 single masks
const DWORD _f32_sig    =0x80000000;    // sign
const DWORD _f32_exp    =0x7F800000;    // exponent
const DWORD _f32_exp_sig=0x40000000;    // exponent sign
const DWORD _f32_exp_bia=0x3F800000;    // exponent bias
const DWORD _f32_exp_lsb=0x00800000;    // exponent LSB
const DWORD _f32_exp_pos=        23;    // exponent LSB bit position
const DWORD _f32_man    =0x007FFFFF;    // mantisa
const DWORD _f32_man_msb=0x00400000;    // mantisa MSB
const DWORD _f32_man_bits=       23;    // mantisa bits
//---------------------------------------------------------------------------
double f64_floor(double x)
    {
    const int h=1;      // may be platform dependent MSB/LSB order
    const int l=0;
    union _f64          // semi result
        {
        double f;       // 64bit floating point
        DWORD u[2];     // 2x32 bit uint
        } y;
    DWORD m,a;
    int sig,exp,sh;
    y.f=x;
    // extract sign
    sig =y.u[h]&_f64_sig;
    // extract exponent
    exp =((y.u[h]&_f64_exp)>>_f64_exp_pos)-(_f64_exp_bia>>_f64_exp_pos);
    // floor bit shift
    sh=_f64_man_bits-exp; a=0;
    if (exp<0)
        {
        a=y.u[l]|(y.u[h]&_f64_man);
        if (sig) return -1.0;
        return 0.0;
        }
    // LSW
    if (sh>0)
        {
        if (sh<32) m=(0xFFFFFFFF>>sh)<<sh; else m=0;
        a=y.u[l]&(m^0xFFFFFFFF); y.u[l]&=m;
        }
    // MSW
    sh-=32;
    if (sh>0)
        {
        if (sh<_f64_exp_pos) m=(0xFFFFFFFF>>sh)<<sh; else m=_f64_sig|_f64_exp;
        a|=y.u[h]&(m^0xFFFFFFFF); y.u[h]&=m;
        }
    if ((sig)&&(a)) y.f--;
    return y.f;
    }
//---------------------------------------------------------------------------
float f32_floor(float x)
    {
    union               // semi result
        {
        float f;        // 32bit floating point
        DWORD u;        // 32 bit uint
        } y;
    DWORD m,a;
    int sig,exp,sh;
    y.f=x;
    // extract sign
    sig =y.u&_f32_sig;
    // extract exponent
    exp =((y.u&_f32_exp)>>_f32_exp_pos)-(_f32_exp_bia>>_f32_exp_pos);
    // floor bit shift
    sh=_f32_man_bits-exp; a=0;
    if (exp<0)
        {
        a=y.u&_f32_man;
        if (sig) return -1.0;
        return 0.0;
        }
    if (sh>0)
        {
        if (sh<_f32_exp_pos) m=(0xFFFFFFFF>>sh)<<sh; else m=_f32_sig|_f32_exp;
        a|=y.u&(m^0xFFFFFFFF); y.u&=m;
        }
    if ((sig)&&(a)) y.f--;
    return y.f;
    }
//---------------------------------------------------------------------------

The point is to make mask that will clear out the decimal bits from mantissa and in case of negative input and non zero cleared bits decrement the result. To access individual bits you can convert your floating point value to integral representation with use of union (like in the example) or use pointers instead.

I tested this in simple VCL app like this:

float f32;
double f64;
AnsiString txt="";
// 64 bit
txt+="[double]\r\n";
for (f64=-10.0;f64<=10.0;f64+=0.1)
 if (fabs(floor(f64)-f64_floor(f64))>1e-6)
    {
    txt+=AnsiString().sprintf("%5.3lf %5.3lf %5.3lf\r\n",f64,floor(f64),f64_floor(f64));
    f64_floor(f64);
    }
for (f64=1;f64<=1e307;f64*=1.1)
    {
    if (fabs(floor( f64)-f64_floor( f64))>1e-6) { txt+=AnsiString().sprintf("%lf lf lf\r\n", f64,floor( f64),f64_floor( f64));
    f64_floor( f64); }
    if (fabs(floor(-f64)-f64_floor(-f64))>1e-6) { txt+=AnsiString().sprintf("%lf lf lf\r\n",-f64,floor(-f64),f64_floor(-f64));
    f64_floor(-f64); }
    }
// 32 bit
txt+="[float]\r\n";
for (f32=-10.0;f32<=10.0;f32+=0.1)
 if (fabs(floor(f32)-f32_floor(f32))>1e-6)
    {
    txt+=AnsiString().sprintf("%5.3lf %5.3lf %5.3lf\r\n",f32,floor(f32),f32_floor(f32));
    f32_floor(f32);
    }
for (f32=1;f32<=1e37;f32*=1.1)
    {
    if (fabs(floor( f32)-f32_floor( f32))>1e-6) { txt+=AnsiString().sprintf("%lf lf lf\r\n", f32,floor( f32),f32_floor( f32));
    f32_floor( f32); }
    if (fabs(floor(-f32)-f32_floor(-f32))>1e-6) { txt+=AnsiString().sprintf("%lf lf lf\r\n",-f32,floor(-f32),f32_floor(-f32));
    f32_floor(-f32); }
    }
mm_log->Lines->Add(txt);

with no difference result (so in all tested cases it matches math.h floor() values. If you want to give this a shot outside VCL then just change AnsiString to any string type you got at hand and change the output from TMemo::mm_log to anything you got (like console cout or whatever)

The double calling of fxx_floor() in case of difference is for debuging purposes (you can place a breakpoint and step in the error case directly).

[Notes]

Beware the order of words (MSW,LSW) is platform dependent so you should adjust the h,l constants accordingly. This code is not optimized so it is easily understandable so do not expect it will be fast.

来源：https://stackoverflow.com/questions/41856771/write-your-own-implementation-of-maths-floor-function-c

标签

math

floor