Understanding printf() with integers

问题

I have a question regarding how the printf() method prints integers, signed or unsigned. One day, I found myself thinking about how difficult it must be to convert a binary sequence into a sequence of decimal digits that a human can understand, given that a computer has no concept of decimal.

Below, I have a printf() method (from here) with its associated methods. I've tried to understand as much as I can about how printi() works, as you can see in the comments:

#define PAD_RIGHT 1
#define PAD_ZERO 2

#include <stdarg.h>

static void printchar(char **str, int c)
{
    extern int putchar(int c);

    if (str) {
        **str = c;
        ++(*str);
    }
    else (void)putchar(c);
}

static int prints(char **out, const char *string, int width, int pad)
{
    register int pc = 0, padchar = ' ';

    if (width > 0) {
        register int len = 0;
        register const char *ptr;
        for (ptr = string; *ptr; ++ptr) ++len;
        if (len >= width) width = 0;
        else width -= len;
        if (pad & PAD_ZERO) padchar = '0';
    }
    if (!(pad & PAD_RIGHT)) {
        for ( ; width > 0; --width) {
            printchar (out, padchar);
            ++pc;
        }
    }
    for ( ; *string ; ++string) {
        printchar (out, *string);
        ++pc;
    }
    for ( ; width > 0; --width) {
        printchar (out, padchar);
        ++pc;
    }

    return pc;
}

/* the following should be enough for 32 bit int */
#define PRINT_BUF_LEN 12

static int printi(char **out, int i, int b, int sg, int width, int pad, int letbase)
{
    /*
        i is the number we are turning into a string
        b is the base, i.e. base 10 for decimal
        sg is if the number is signed, i.e. 1 for signed (%d), 0 for unsigned (%u)

        By default, width and pad are 0, letbase is 97
    */

    char print_buf[PRINT_BUF_LEN];
    register char *s;
    register int t, neg = 0, pc = 0;
    register unsigned int u = i;

    if (i == 0)
    {
        print_buf[0] = '0';
        print_buf[1] = '\0';
        return prints(out, print_buf, width, pad);
    }

    if (sg && b == 10 && i < 0)
    {
        neg = 1;
        u = -i;
    }

    s = print_buf + PRINT_BUF_LEN - 1;
    *s = '\0';

    while (u)
    {
        t = u % b;

        if (t >= 10)
            t += letbase - '0' - 10;

        *--s = t + '0';
        u /= b;
    }

    if (neg)
    {
        if (width && (pad & PAD_ZERO))
        {
            printchar(out, '-');
            ++pc;
            --width;
        }
        else
            *--s = '-';
    }

    return pc + prints(out, s, width, pad);
}

static int print(char** out, const char* format, va_list args)
{
    register int width, pad;
    register int pc = 0;
    char scr[2];

    for (; *format != 0; ++format)
    {
        if (*format == '%')
        {
            ++format;
            width = pad = 0;

            if (*format == '\0')
                break;

            if (*format == '%')
                goto out;

            if (*format == '-')
            {
                ++format;
                pad = PAD_RIGHT;
            }

            while (*format == '0')
            {
                ++format;
                pad |= PAD_ZERO;
            }

            for (; *format >= '0' && *format <= '9'; ++format)
            {
                width *= 10;
                width += *format - '0';
            }

            if (*format == 's')
            {
                register char* s = (char*) va_arg(args, int);
                pc += prints(out, s ? s : "(null)", width, pad);
                continue;
            }

            if (*format == 'd')
            {
                pc += printi(out, va_arg(args, int), 10, 1, width, pad, 'a');
                continue;
            }

            if (*format == 'x')
            {
                pc += printi(out, va_arg(args, int), 16, 0, width, pad, 'a');
                continue;
            }

            if (*format == 'X')
            {
                pc += printi(out, va_arg(args, int), 16, 0, width, pad, 'A');
                continue;
            }

            if (*format == 'u')
            {
                pc += printi(out, va_arg(args, int), 10, 0, width, pad, 'a');
                continue;
            }

            if (*format == 'c')
            {
                /* char are converted to int then pushed on the stack */
                scr[0] = (char) va_arg(args, int);
                scr[1] = '\0';
                pc += prints(out, scr, width, pad);
                continue;
            }
        }
        else
        {
            out:
            printchar (out, *format);
            ++pc;
        }
    }

    if (out)
        **out = '\0';

    va_end(args);

    return pc;
}

int printf(const char *format, ...)
{
        va_list args;

        va_start( args, format );
        return print( 0, format, args );
}

If there's one thing I hate about reading library source code, it's that it's hardly ever readable. Variable names with one character and no comment to explain them are a pain.

Can you please explain, in a simple way, what exactly the method is doing to convert an integer into a string of decimal digits?

回答1:

The code you've pasted is not difficult to read. I suspect you may have given up early.

Ignoring the potential for a negative number for a moment, this printi() routine:

creates a buffer to print the number into, 12 characters wide
sets a character pointer s to point to the end of that buffer ** NULL-terminates it, then moves the pointer one character to the "left"

Then the routine enters a loop, for as long as the number remains > 0

MOD by 10 (that is, divide by 10 and take the remainder)
- this becomes the digit that s is pointing to, so the ASCII representation is put there
- s is moved to the left again
set the number to itself / 10; this removes the digit that was just printed
repeat the loop as long as there are more digits to print

The only tricky thing here is the dealing with negative numbers, but if you understand how negative numbers are stored, it's not really tricky at all.

回答2:

Maybe I've been staring at template library headers too long, but that library code looks pretty readable to me!

I will explain the main loop, since the rest (juggling the sign around etc.) should be fairly easy to figure out.

while (u)
{
    t = u % b;

    if (t >= 10)
        t += letbase - '0' - 10;

    *--s = t + '0';
    u /= b;
}

Basically what we are doing is extracting digits one at a time, from right to left. Suppose b == 10 (i.e. the usual case of %d or %u). The % operator, called the modulo operator, calculates the remainder that is left after integer division. The first time the line t = u % b; runs, it calculates the rightmost digit of the output string -- what is left as a remainder after you divide the number u by 10. (Suppose the number u was 493: the remainder after dividing this by 10 is 3, the rightmost digit.)

After extracting this rightmost digit into t, the if statement decides what to "call" this digit if it is 10 or larger. This fixup amounts to adjusting t so that, when '0' (the ASCII value of the digit '0', which is 48) is added in the next line the result will be a letter starting at 'a' or 'A' (to produce hex digits and other digits for bases larger than 10).

The line after that writes the digit into the buffer. It goes into the rightmost character of the print_buf buffer (notice how s is earlier initialised to point to the end of this buffer, not the start as is usually the case). The pointer s is subsequently moved one character to the left in preparation for the next character.

The following line, u /= b, simply divides u by 10, effectively discarding the rightmost digit. (This works because integer division never produces fractions, and always rounds down.) This then opens up the second-rightmost digit for the next loop iteration to process. Rinse, repeat. The loop finally stops when there is nothing left (the condition while (u) is equivalent to the condition while (u != 0)).

回答3:

The method to convert a positive integer I to base 10 is basically :

if (i == 0)
    printf("0");
else while (i != 0) {
    unsigned int j = i / 10;
    unsigned int digit = i - 10 * j;
    printf("%c", digit + '0');
    i = j;
}

Except that this prints out the number backward.

来源：https://stackoverflow.com/questions/13878189/understanding-printf-with-integers

标签

integer

printf

integer-arithmetic