Fastest way to calculate a 128-bit integer modulo a 64-bit integer

后端 未结 13 1866
谎友^
谎友^ 2020-12-01 00:15

I have a 128-bit unsigned integer A and a 64-bit unsigned integer B. What\'s the fastest way to calculate A % B - that is the (64-bit) remainder from dividing A

13条回答
  •  余生分开走
    2020-12-01 00:27

    I have made both version of Mod128by64 'Russian peasant' division function: classic and speed optimised. Speed optimised can do on my 3Ghz PC more than 1000.000 random calculations per second and is more than three times faster than classic function. If we compare the execution time of calculating 128 by 64 and calculating 64 by 64 bit modulo than this function is only about 50% slower.

    Classic Russian peasant:

    function Mod128by64Clasic(Dividend: PUInt128; Divisor: PUInt64): UInt64;
    //In : eax = @Dividend
    //   : edx = @Divisor
    //Out: eax:edx as Remainder
    asm
    //Registers inside rutine
    //edx:ebp = Divisor
    //ecx = Loop counter
    //Result = esi:edi
      push    ebx                     //Store registers to stack
      push    esi
      push    edi
      push    ebp
      mov     ebp, [edx]              //Load  divisor to edx:ebp
      mov     edx, [edx + 4]
      mov     ecx, ebp                //Div by 0 test
      or      ecx, edx
      jz      @DivByZero
      push    [eax]                   //Store Divisor to the stack
      push    [eax + 4]
      push    [eax + 8]
      push    [eax + 12]
      xor     edi, edi                //Clear result
      xor     esi, esi
      mov     ecx, 128                //Load shift counter
    @Do128BitsShift:
      shl     [esp + 12], 1           //Shift dividend from stack left for one bit
      rcl     [esp + 8], 1
      rcl     [esp + 4], 1
      rcl     [esp], 1
      rcl     edi, 1
      rcl     esi, 1
      setc    bh                      //Save 65th bit
      sub     edi, ebp                //Compare dividend and  divisor
      sbb     esi, edx                //Subtract the divisor
      sbb     bh, 0                   //Use 65th bit in bh
      jnc     @NoCarryAtCmp           //Test...
      add     edi, ebp                //Return privius dividend state
      adc     esi, edx
    @NoCarryAtCmp:
      loop    @Do128BitsShift
    //End of 128 bit division loop
      mov     eax, edi                //Load result to eax:edx
      mov     edx, esi
    @RestoreRegisters:
      lea     esp, esp + 16           //Restore Divisors space on stack
      pop     ebp                     //Restore Registers
      pop     edi                     
      pop     esi
      pop     ebx
      ret
    @DivByZero:
      xor     eax, eax                //Here you can raise Div by 0 exception, now function only return 0.
      xor     edx, edx
      jmp     @RestoreRegisters
    end;
    

    Speed optimised Russian peasant:

    function Mod128by64Oprimized(Dividend: PUInt128; Divisor: PUInt64): UInt64;
    //In : eax = @Dividend
    //   : edx = @Divisor
    //Out: eax:edx as Remainder
    asm
    //Registers inside rutine
    //Divisor = edx:ebp
    //Dividend = ebx:edx //We need 64 bits
    //Result = esi:edi
    //ecx = Loop counter and Dividend index
      push    ebx                     //Store registers to stack
      push    esi
      push    edi
      push    ebp
      mov     ebp, [edx]              //Divisor = edx:ebp
      mov     edx, [edx + 4]
      mov     ecx, ebp                //Div by 0 test
      or      ecx, edx
      jz      @DivByZero
      xor     edi, edi                //Clear result
      xor     esi, esi
    //Start of 64 bit division Loop
      mov     ecx, 15                 //Load byte loop shift counter and Dividend index
    @SkipShift8Bits:                  //Small Dividend numbers shift optimisation
      cmp     [eax + ecx], ch         //Zero test
      jnz     @EndSkipShiftDividend
      loop    @SkipShift8Bits         //Skip Compute 8 Bits unroled loop ?
    @EndSkipShiftDividend:
      test    edx, $FF000000          //Huge Divisor Numbers Shift Optimisation
      jz      @Shift8Bits             //This Divisor is > $00FFFFFF:FFFFFFFF
      mov     ecx, 8                  //Load byte shift counter
      mov     esi, [eax + 12]         //Do fast 56 bit (7 bytes) shift...
      shr     esi, cl                 //esi = $00XXXXXX
      mov     edi, [eax + 9]          //Load for one byte right shifted 32 bit value
    @Shift8Bits:
      mov     bl, [eax + ecx]         //Load 8 bit part of Dividend
    //Compute 8 Bits unroled loop
      shl     bl, 1                   //Shift dividend left for one bit
      rcl     edi, 1
      rcl     esi, 1
      jc      @DividentAbove0         //dividend hi bit set?
      cmp     esi, edx                //dividend hi part larger?
      jb      @DividentBelow0
      ja      @DividentAbove0
      cmp     edi, ebp                //dividend lo part larger?
      jb      @DividentBelow0
    @DividentAbove0:
      sub     edi, ebp                //Return privius dividend state
      sbb     esi, edx
    @DividentBelow0:
      shl     bl, 1                   //Shift dividend left for one bit
      rcl     edi, 1
      rcl     esi, 1
      jc      @DividentAbove1         //dividend hi bit set?
      cmp     esi, edx                //dividend hi part larger?
      jb      @DividentBelow1
      ja      @DividentAbove1
      cmp     edi, ebp                //dividend lo part larger?
      jb      @DividentBelow1
    @DividentAbove1:
      sub     edi, ebp                //Return privius dividend state
      sbb     esi, edx
    @DividentBelow1:
      shl     bl, 1                   //Shift dividend left for one bit
      rcl     edi, 1
      rcl     esi, 1
      jc      @DividentAbove2         //dividend hi bit set?
      cmp     esi, edx                //dividend hi part larger?
      jb      @DividentBelow2
      ja      @DividentAbove2
      cmp     edi, ebp                //dividend lo part larger?
      jb      @DividentBelow2
    @DividentAbove2:
      sub     edi, ebp                //Return privius dividend state
      sbb     esi, edx
    @DividentBelow2:
      shl     bl, 1                   //Shift dividend left for one bit
      rcl     edi, 1
      rcl     esi, 1
      jc      @DividentAbove3         //dividend hi bit set?
      cmp     esi, edx                //dividend hi part larger?
      jb      @DividentBelow3
      ja      @DividentAbove3
      cmp     edi, ebp                //dividend lo part larger?
      jb      @DividentBelow3
    @DividentAbove3:
      sub     edi, ebp                //Return privius dividend state
      sbb     esi, edx
    @DividentBelow3:
      shl     bl, 1                   //Shift dividend left for one bit
      rcl     edi, 1
      rcl     esi, 1
      jc      @DividentAbove4         //dividend hi bit set?
      cmp     esi, edx                //dividend hi part larger?
      jb      @DividentBelow4
      ja      @DividentAbove4
      cmp     edi, ebp                //dividend lo part larger?
      jb      @DividentBelow4
    @DividentAbove4:
      sub     edi, ebp                //Return privius dividend state
      sbb     esi, edx
    @DividentBelow4:
      shl     bl, 1                   //Shift dividend left for one bit
      rcl     edi, 1
      rcl     esi, 1
      jc      @DividentAbove5         //dividend hi bit set?
      cmp     esi, edx                //dividend hi part larger?
      jb      @DividentBelow5
      ja      @DividentAbove5
      cmp     edi, ebp                //dividend lo part larger?
      jb      @DividentBelow5
    @DividentAbove5:
      sub     edi, ebp                //Return privius dividend state
      sbb     esi, edx
    @DividentBelow5:
      shl     bl, 1                   //Shift dividend left for one bit
      rcl     edi, 1
      rcl     esi, 1
      jc      @DividentAbove6         //dividend hi bit set?
      cmp     esi, edx                //dividend hi part larger?
      jb      @DividentBelow6
      ja      @DividentAbove6
      cmp     edi, ebp                //dividend lo part larger?
      jb      @DividentBelow6
    @DividentAbove6:
      sub     edi, ebp                //Return privius dividend state
      sbb     esi, edx
    @DividentBelow6:
      shl     bl, 1                   //Shift dividend left for one bit
      rcl     edi, 1
      rcl     esi, 1
      jc      @DividentAbove7         //dividend hi bit set?
      cmp     esi, edx                //dividend hi part larger?
      jb      @DividentBelow7
      ja      @DividentAbove7
      cmp     edi, ebp                //dividend lo part larger?
      jb      @DividentBelow7
    @DividentAbove7:
      sub     edi, ebp                //Return privius dividend state
      sbb     esi, edx
    @DividentBelow7:
    //End of Compute 8 Bits (unroled loop)
      dec     cl                      //Decrement byte loop shift counter
      jns     @Shift8Bits             //Last jump at cl = 0!!!
    //End of division loop
      mov     eax, edi                //Load result to eax:edx
      mov     edx, esi
    @RestoreRegisters:
      pop     ebp                     //Restore Registers
      pop     edi
      pop     esi
      pop     ebx
      ret
    @DivByZero:
      xor     eax, eax                //Here you can raise Div by 0 exception, now function only return 0.
      xor     edx, edx
      jmp     @RestoreRegisters
    end;
    

提交回复
热议问题