64 bit mathematical operations without any loss of data or precision

问题

I believe there isn't any portable standard data type for 128 bits of data. So, my question is about how efficiently 64 bit operations can be carried out without loss of data using existing standard data-types.

For example : I have following two uint64_t type variables:

uint64_t x = -1; uint64_t y = -1;

Now, how the result of mathematical operations such as x+y, x-y, x*y and x/y can be stored/retrieved/printed ?

For above variables, x+y results in value of -1 which is actually a 0xFFFFFFFFFFFFFFFFULL with a carry 1.

void add (uint64_t a, uint64_t b, uint64_t result_high, uint64_t result_low)
{
    result_low = result_high = 0;
    result_low  = a + b;
    result_high += (result_low < a);
}

How other operations can be performed as like add, which gives proper final output ?

I'd appreciate if someone share the generic algorithm which take care of overflow/underflow etcetera that might comes into picture using such operations.

Any standard tested algorithms which might can help.

回答1:

There are lot of BigInteger libraries out there to manipulate big numbers.

GMP Library
C++ Big Integer Library

If you want to avoid library integration and your requirement is quite small, here is my basic BigInteger snippet that I generally use for problem with basic requirement. You can create new methods or overload operators according your need. This snippet is widely tested and bug free.

Source

class BigInt {
public:
    // default constructor
    BigInt() {}

    // ~BigInt() {} // avoid overloading default destructor. member-wise destruction is okay

    BigInt( string b ) {
        (*this) = b;    // constructor for string
    }

    // some helpful methods
    size_t size() const { // returns number of digits
        return a.length();
    }
    BigInt inverseSign() { // changes the sign
        sign *= -1;
        return (*this);
    }
    BigInt normalize( int newSign ) { // removes leading 0, fixes sign
        for( int i = a.size() - 1; i > 0 && a[i] == '0'; i-- )
            a.erase(a.begin() + i);
        sign = ( a.size() == 1 && a[0] == '0' ) ? 1 : newSign;
        return (*this);
    }

    // assignment operator
    void operator = ( string b ) { // assigns a string to BigInt
        a = b[0] == '-' ? b.substr(1) : b;
        reverse( a.begin(), a.end() );
        this->normalize( b[0] == '-' ? -1 : 1 );
    }

    // conditional operators
    bool operator < (BigInt const& b) const { // less than operator
        if( sign != b.sign ) return sign < b.sign;
        if( a.size() != b.a.size() )
            return sign == 1 ? a.size() < b.a.size() : a.size() > b.a.size();
        for( int i = a.size() - 1; i >= 0; i-- ) if( a[i] != b.a[i] )
                return sign == 1 ? a[i] < b.a[i] : a[i] > b.a[i];
        return false;
    }
    bool operator == ( const BigInt &b ) const { // operator for equality
        return a == b.a && sign == b.sign;
    }



    // mathematical operators
    BigInt operator + ( BigInt b ) { // addition operator overloading
        if( sign != b.sign ) return (*this) - b.inverseSign();
        BigInt c;
        for(int i = 0, carry = 0; i<a.size() || i<b.size() || carry; i++ ) {
            carry+=(i<a.size() ? a[i]-48 : 0)+(i<b.a.size() ? b.a[i]-48 : 0);
            c.a += (carry % 10 + 48);
            carry /= 10;
        }
        return c.normalize(sign);
    }
    BigInt operator - ( BigInt b ) { // subtraction operator overloading
        if( sign != b.sign ) return (*this) + b.inverseSign();
        int s = sign;
        sign = b.sign = 1;
        if( (*this) < b ) return ((b - (*this)).inverseSign()).normalize(-s);
        BigInt c;
        for( int i = 0, borrow = 0; i < a.size(); i++ ) {
            borrow = a[i] - borrow - (i < b.size() ? b.a[i] : 48);
            c.a += borrow >= 0 ? borrow + 48 : borrow + 58;
            borrow = borrow >= 0 ? 0 : 1;
        }
        return c.normalize(s);
    }
    BigInt operator * ( BigInt b ) { // multiplication operator overloading
        BigInt c("0");
        for( int i = 0, k = a[i] - 48; i < a.size(); i++, k = a[i] - 48 ) {
            while(k--) c = c + b; // ith digit is k, so, we add k times
            b.a.insert(b.a.begin(), '0'); // multiplied by 10
        }
        return c.normalize(sign * b.sign);
    }
    BigInt operator / ( BigInt b ) { // division operator overloading
        if( b.size() == 1 && b.a[0] == '0' ) b.a[0] /= ( b.a[0] - 48 );
        BigInt c("0"), d;
        for( int j = 0; j < a.size(); j++ ) d.a += "0";
        int dSign = sign * b.sign;
        b.sign = 1;
        for( int i = a.size() - 1; i >= 0; i-- ) {
            c.a.insert( c.a.begin(), '0');
            c = c + a.substr( i, 1 );
            while( !( c < b ) ) c = c - b, d.a[i]++;
        }
        return d.normalize(dSign);
    }
    BigInt operator % ( BigInt b ) { // modulo operator overloading
        if( b.size() == 1 && b.a[0] == '0' ) b.a[0] /= ( b.a[0] - 48 );
        BigInt c("0");
        b.sign = 1;
        for( int i = a.size() - 1; i >= 0; i-- ) {
            c.a.insert( c.a.begin(), '0');
            c = c + a.substr( i, 1 );
            while( !( c < b ) ) c = c - b;
        }
        return c.normalize(sign);
    }

    // << operator overloading
    friend ostream& operator << (ostream&, BigInt const&);

private:
    // representations and structures
    string a; // to store the digits
    int sign; // sign = -1 for negative numbers, sign = 1 otherwise
};

ostream& operator << (ostream& os, BigInt const& obj) {
    if( obj.sign == -1 ) os << "-";
    for( int i = obj.a.size() - 1; i >= 0; i--) {
        os << obj.a[i];
    }
    return os;
}

Usage

BigInt a, b, c;
a = BigInt("1233423523546745312464532");
b = BigInt("45624565434216345i657652454352");
c = a + b;
// c = a * b;
// c = b / a;
// c = b - a;
// c = b % a;
cout << c << endl;

// dynamic memory allocation
BigInt *obj = new BigInt("123");
delete obj;

回答2:

You can emulate uint128_t if you don't have it:

typedef struct uint128_t { uint64_t lo, hi } uint128_t;
...

uint128_t add (uint64_t a, uint64_t b) {
    uint128_t r; r.lo = a + b; r.hi = + (r.lo < a); return r; }

uint128_t sub (uint64_t a, uint64_t b) {
    uint128_t r; r.lo = a - b; r.hi = - (r.lo > a); return r; }

Multiplication without inbuilt compiler or assembler support is a bit more difficult to get right. Essentially, you need to split both multiplicands into hi:lo unsigned 32-bit, and perform 'long multiplication' taking care of carries and 'columns' between the partial 64-bit products.

Divide and modulo return 64 bit results given 64 bit arguments - so that's not an issue as you have defined the problem. Dividing 128 bit by 64 or 128 bit operands is a much more complicated operation, requiring normalization, etc.

longlong.h routines umul_ppmm and udiv_qrnnd in GMP give the 'elementary' steps for multiple-precision/limb operations.

回答3:

In most of the modern GCC compilers __int128 type is supported which can hold a 128 bit integers.

Example,

__int128 add(__int128 a, __int128 b){
    return a + b;
}

来源：https://stackoverflow.com/questions/24969626/64-bit-mathematical-operations-without-any-loss-of-data-or-precision

标签

algorithm

math

operators

arithmetic-expressions