Position of least significant bit that is set

后端 未结 23 1275
时光取名叫无心
时光取名叫无心 2020-11-22 08:46

I am looking for an efficient way to determine the position of the least significant bit that is set in an integer, e.g. for 0x0FF0 it would be 4.

A trivial impleme

23条回答
  •  耶瑟儿~
    2020-11-22 08:58

    Weee, loads of solutions and not a benchmark in sight. You people should be ashamed of yourselves ;-)

    My machine is an Intel i530 (2.9 GHz), running Windows 7 64-bit. I compiled with a 32-bit version of MinGW.

    $ gcc --version
    gcc.exe (GCC) 4.7.2
    
    $ gcc bench.c -o bench.exe -std=c99 -Wall -O2
    $ bench
    Naive loop.         Time = 2.91  (Original questioner)
    De Bruijn multiply. Time = 1.16  (Tykhyy)
    Lookup table.       Time = 0.36  (Andrew Grant)
    FFS instruction.    Time = 0.90  (ephemient)
    Branch free mask.   Time = 3.48  (Dan / Jim Balter)
    Double hack.        Time = 3.41  (DocMax)
    
    $ gcc bench.c -o bench.exe -std=c99 -Wall -O2 -march=native
    $ bench
    Naive loop.         Time = 2.92
    De Bruijn multiply. Time = 0.47
    Lookup table.       Time = 0.35
    FFS instruction.    Time = 0.68
    Branch free mask.   Time = 3.49
    Double hack.        Time = 0.92
    

    My code:

    #include 
    #include 
    #include 
    
    
    #define ARRAY_SIZE 65536
    #define NUM_ITERS 5000  // Number of times to process array
    
    
    int find_first_bits_naive_loop(unsigned nums[ARRAY_SIZE])
    {
        int total = 0; // Prevent compiler from optimizing out the code
        for (int j = 0; j < NUM_ITERS; j++) {
            for (int i = 0; i < ARRAY_SIZE; i++) {
                unsigned value = nums[i];
                if (value == 0)
                    continue;
                unsigned pos = 0;
                while (!(value & 1))
                {
                    value >>= 1;
                    ++pos;
                }
                total += pos + 1;
            }
        }
    
        return total;
    }
    
    
    int find_first_bits_de_bruijn(unsigned nums[ARRAY_SIZE])
    {
        static const int MultiplyDeBruijnBitPosition[32] = 
        {
           1, 2, 29, 3, 30, 15, 25, 4, 31, 23, 21, 16, 26, 18, 5, 9, 
           32, 28, 14, 24, 22, 20, 17, 8, 27, 13, 19, 7, 12, 6, 11, 10
        };
    
        int total = 0; // Prevent compiler from optimizing out the code
        for (int j = 0; j < NUM_ITERS; j++) {
            for (int i = 0; i < ARRAY_SIZE; i++) {
                unsigned int c = nums[i];
                total += MultiplyDeBruijnBitPosition[((unsigned)((c & -c) * 0x077CB531U)) >> 27];
            }
        }
    
        return total;
    }
    
    
    unsigned char lowestBitTable[256];
    int get_lowest_set_bit(unsigned num) {
        unsigned mask = 1;
        for (int cnt = 1; cnt <= 32; cnt++, mask <<= 1) {
            if (num & mask) {
                return cnt;
            }
        }
    
        return 0;
    }
    int find_first_bits_lookup_table(unsigned nums[ARRAY_SIZE])
    {
        int total = 0; // Prevent compiler from optimizing out the code
        for (int j = 0; j < NUM_ITERS; j++) {
            for (int i = 0; i < ARRAY_SIZE; i++) {
                unsigned int value = nums[i];
                // note that order to check indices will depend whether you are on a big 
                // or little endian machine. This is for little-endian
                unsigned char *bytes = (unsigned char *)&value;
                if (bytes[0])
                    total += lowestBitTable[bytes[0]];
                else if (bytes[1])
                  total += lowestBitTable[bytes[1]] + 8;
                else if (bytes[2])
                  total += lowestBitTable[bytes[2]] + 16;
                else
                  total += lowestBitTable[bytes[3]] + 24;
            }
        }
    
        return total;
    }
    
    
    int find_first_bits_ffs_instruction(unsigned nums[ARRAY_SIZE])
    {
        int total = 0; // Prevent compiler from optimizing out the code
        for (int j = 0; j < NUM_ITERS; j++) {
            for (int i = 0; i < ARRAY_SIZE; i++) {
                total +=  __builtin_ffs(nums[i]);
            }
        }
    
        return total;
    }
    
    
    int find_first_bits_branch_free_mask(unsigned nums[ARRAY_SIZE])
    {
        int total = 0; // Prevent compiler from optimizing out the code
        for (int j = 0; j < NUM_ITERS; j++) {
            for (int i = 0; i < ARRAY_SIZE; i++) {
                unsigned value = nums[i];
                int i16 = !(value & 0xffff) << 4;
                value >>= i16;
    
                int i8 = !(value & 0xff) << 3;
                value >>= i8;
    
                int i4 = !(value & 0xf) << 2;
                value >>= i4;
    
                int i2 = !(value & 0x3) << 1;
                value >>= i2;
    
                int i1 = !(value & 0x1);
    
                int i0 = (value >> i1) & 1? 0 : -32;
    
                total += i16 + i8 + i4 + i2 + i1 + i0 + 1;
            }
        }
    
        return total;
    }
    
    
    int find_first_bits_double_hack(unsigned nums[ARRAY_SIZE])
    {
        int total = 0; // Prevent compiler from optimizing out the code
        for (int j = 0; j < NUM_ITERS; j++) {
            for (int i = 0; i < ARRAY_SIZE; i++) {
                unsigned value = nums[i];
                double d = value ^ (value - !!value); 
                total += (((int*)&d)[1]>>20)-1022; 
            }
        }
    
        return total;
    }
    
    
    int main() {
        unsigned nums[ARRAY_SIZE];
        for (int i = 0; i < ARRAY_SIZE; i++) {
            nums[i] = rand() + (rand() << 15);
        }
    
        for (int i = 0; i < 256; i++) {
            lowestBitTable[i] = get_lowest_set_bit(i);
        }
    
    
        clock_t start_time, end_time;
        int result;
    
        start_time = clock();
        result = find_first_bits_naive_loop(nums);
        end_time = clock();
        printf("Naive loop.         Time = %.2f, result = %d\n", 
            (end_time - start_time) / (double)(CLOCKS_PER_SEC), result);
    
        start_time = clock();
        result = find_first_bits_de_bruijn(nums);
        end_time = clock();
        printf("De Bruijn multiply. Time = %.2f, result = %d\n", 
            (end_time - start_time) / (double)(CLOCKS_PER_SEC), result);
    
        start_time = clock();
        result = find_first_bits_lookup_table(nums);
        end_time = clock();
        printf("Lookup table.       Time = %.2f, result = %d\n", 
            (end_time - start_time) / (double)(CLOCKS_PER_SEC), result);
    
        start_time = clock();
        result = find_first_bits_ffs_instruction(nums);
        end_time = clock();
        printf("FFS instruction.    Time = %.2f, result = %d\n", 
            (end_time - start_time) / (double)(CLOCKS_PER_SEC), result);
    
        start_time = clock();
        result = find_first_bits_branch_free_mask(nums);
        end_time = clock();
        printf("Branch free mask.   Time = %.2f, result = %d\n", 
            (end_time - start_time) / (double)(CLOCKS_PER_SEC), result);
    
        start_time = clock();
        result = find_first_bits_double_hack(nums);
        end_time = clock();
        printf("Double hack.        Time = %.2f, result = %d\n", 
            (end_time - start_time) / (double)(CLOCKS_PER_SEC), result);
    }
    

提交回复
热议问题