I\'ve got four unsigned 32-bit integers representing an unsigned 128-bit integer, in little endian order:
typedef struct {
unsigned int part[4];
} bigint
Lookup table of 8 bits. You can have 4 lookup tables of 256 numbers. First is from 0-256 for LSB bytes, Second table is first table multiplied by 256 and so on.
SO when you need your number sum up numbers from lookup table. When you adding you can add as bunary and go later one pass over each byte to fix owerflows.
Example number 0x12345678 In first lookup table there is under addres (0x78 = 120) so 0x010200 is first number in second table under(0x56=87) is 0x0202000106 (0x56 in dec is 22016) in third table you hou would have 0x03040007080702 and under last lable at 0x12 you have 0x030001090809080808 (this does not fit in 32 bit arithmetic, but that you allredy know)
Then sum up this numbers (as binary bumbers) and go one pass, byte by byte for overflow code in for loop is something like
s=carry+val[i];
val[i]=val[i]&10
carry=s/10;
//you can put last two operations in table
If we count operations needed for this.
1.(looking in tables and adding) 4 lookup tables. 16 additions (keep in mind that when you do not need to carry about owerflow, becuase they can not ocur)
2. one pass in each step 3 operatins 16 steps to pass.
passimistic upper bound 6*16 = 100 operations.
EDIT:
Here is c++ code, and is 30% faster than naive implementation.
#include
#include
#include
static uint64_t lu[4][256];
constexpr uint64_t lookup_value(uint64_t n) {
uint64_t r = 0;
uint64_t t = 1;
while (n) {
uint64_t rem = n % 10;
n /= 10;
r += rem * t;
t *= 256;
}
return r;
}
void make_lu() {
uint64_t step = 1;
for (int j = 0; j < 4; ++j) {
uint64_t n = 0;
for (int i = 0; i < 256; ++i) {
lu[j][i] = lookup_value(n);
n += step;
}
step *= 256;
}
}
struct DivMod {
uint8_t div;
uint8_t rem;
};
static DivMod dm[256];
void make_dm() {
for (int i = 0; i < 256; ++i) {
dm[i].div = i / 10;
dm[i].rem = i % 10;
}
}
void init() {
make_lu();
make_dm();
}
uint64_t b2d(uint64_t n) {
uint64_t r = 0;
for (int i = 0; i < 4; ++i) {
r += lu[i][(n >> (i * 8)) & 0xff];
}
uint64_t r2 = 0;
uint64_t of = 0;
for (int i = 0; i < 8; ++i) {
uint64_t v = ((r >> (i * 8)) & 0xff) + of;
DivMod &x = dm[v];
of = x.div;
r2 += uint64_t(x.rem) << (i * 8);
}
return r2;
}
int main() {
init();
uint64_t n;
std::cin >> n;
std::cout << std::hex << b2d(n) << "\n";
return 0;
}