Faster 16bit multiplication algorithm for 8-bit MCU

前端 未结 6 1566
没有蜡笔的小新
没有蜡笔的小新 2021-02-12 11:27

I\'m searching for an algorithm to multiply two integer numbers that is better than the one below. Do you have a good idea about that? (The MCU - AT Tiny 84/85 or similar - wher

6条回答
  •  南笙
    南笙 (楼主)
    2021-02-12 12:08

    At long last, an answer, if a cheeky one: I couldn't (yet) get the AVR-C-compiler from the GCC fit it into 8K code. (For an assembler rendition, see AVR multiplication: No Holds Barred).
    The approach is what everyone who used Duff's device tried for a second attempt:
    use a switch. Using macros, the source code looks entirely harmless, if massaged:

    #define low(mp)     case mp: p = a0 * (uint8_t)(mp) << 8; break
    #define low4(mp)    low(mp); low(mp + 1); low(mp + 2); low(mp + 3)
    #define low16(mp)   low4(mp); low4(mp + 4); low4(mp + 8); low4(mp + 12)
    #define low64(mp)   low16(mp); low16(mp + 16); low16(mp + 32); low16(mp + 48)
    #if preShift
    # define CASE(mp)   case mp: return p + a * (mp)
    #else
    # define CASE(mp)   case mp: return (p0<<8) + a * (mp)
    #endif
    #define case4(mp)   CASE(mp); CASE(mp + 1); CASE(mp + 2); CASE(mp + 3)
    #define case16(mp)  case4(mp); case4(mp + 4); case4(mp + 8); case4(mp + 12)
    #define case64(mp)  case16(mp); case16(mp + 16); case16(mp + 32); case16(mp + 48)
    
    extern "C" __attribute__ ((noinline))
     uint16_t mpy16NHB16(uint16_t a, uint16_t b)
    {
        uint16_t p = 0;
        uint8_t b0 = (uint8_t)b, b1 = (uint8_t)(b>>8);
        uint8_t a0 = (uint8_t)a, p0;
    
        switch (b1) {
            case64(0);
            case64(64);
            case64(128);
            case64(192);
        }
    #if preShift
        p = p0 << 8;
    #endif
    #if preliminaries
        if (0 == b0) {
            p = -a;
            if (b & 0x8000)
                p += a << 9;
            if (b & 0x4000)
                p += a << 8;
            return p;
        }
        while (b0 & 1) {
            a <<= 1;
            b0 >>= 1;
        }
    #endif
        switch (b0) {
            low64(0);
            low64(64);
            low64(128);
            low64(192);
        }
        return ~0;
    }
    int main(int ac, char const *const av[])
    {
        char buf[22];
        for (uint16_t a = 0 ; a < a+1 ; a++)
          for (uint16_t m = 0 ; m <= a ; m++)
            puts(itoa(//shift4(ac)+shift3MaskAdd((uint16_t)av[0], ac)
        //      +shift4Add(ac, (uint16_t)av[0])
        //           + mpy16NHB16(ac, (ac + 105))
                     mpy16NHB16(a, m), buf, 10));
    }
    

提交回复
热议问题