Fastest way to calculate a 128-bit integer modulo a 64-bit integer

后端 未结 13 1834
谎友^
谎友^ 2020-12-01 00:15

I have a 128-bit unsigned integer A and a 64-bit unsigned integer B. What\'s the fastest way to calculate A % B - that is the (64-bit) remainder from dividing A

13条回答
  •  日久生厌
    2020-12-01 00:43

    This is almost untested partly speed modificated Mod128by64 'Russian peasant' algorithm function. Unfortunately I'm a Delphi user so this function works under Delphi. :) But the assembler is almost the same so...

    function Mod128by64(Dividend: PUInt128; Divisor: PUInt64): UInt64;
    //In : eax = @Dividend
    //   : edx = @Divisor
    //Out: eax:edx as Remainder
    asm
    //Registers inside rutine
    //Divisor = edx:ebp
    //Dividend = bh:ebx:edx //We need 64 bits + 1 bit in bh
    //Result = esi:edi
    //ecx = Loop counter and Dividend index
      push    ebx                     //Store registers to stack
      push    esi
      push    edi
      push    ebp
      mov     ebp, [edx]              //Divisor = edx:ebp
      mov     edx, [edx + 4]
      mov     ecx, ebp                //Div by 0 test
      or      ecx, edx                
      jz      @DivByZero
      xor     edi, edi                //Clear result
      xor     esi, esi
    //Start of 64 bit division Loop
      mov     ecx, 15                 //Load byte loop shift counter and Dividend index
    @SkipShift8Bits:                  //Small Dividend numbers shift optimisation
      cmp     [eax + ecx], ch         //Zero test
      jnz     @EndSkipShiftDividend
      loop    @SkipShift8Bits         //Skip 8 bit loop
    @EndSkipShiftDividend:
      test    edx, $FF000000          //Huge Divisor Numbers Shift Optimisation
      jz      @Shift8Bits             //This Divisor is > $00FFFFFF:FFFFFFFF
      mov     ecx, 8                  //Load byte shift counter
      mov     esi, [eax + 12]         //Do fast 56 bit (7 bytes) shift...
      shr     esi, cl                 //esi = $00XXXXXX
      mov     edi, [eax + 9]          //Load for one byte right shifted 32 bit value
    @Shift8Bits:
      mov     bl, [eax + ecx]         //Load 8 bits of Dividend
    //Here we can unrole partial loop 8 bit division to increase execution speed...
      mov     ch, 8                   //Set partial byte counter value
    @Do65BitsShift:
      shl     bl, 1                   //Shift dividend left for one bit
      rcl     edi, 1
      rcl     esi, 1
      setc    bh                      //Save 65th bit
      sub     edi, ebp                //Compare dividend and  divisor
      sbb     esi, edx                //Subtract the divisor
      sbb     bh, 0                   //Use 65th bit in bh
      jnc     @NoCarryAtCmp           //Test...
      add     edi, ebp                //Return privius dividend state
      adc     esi, edx
    @NoCarryAtCmp:
      dec     ch                      //Decrement counter
      jnz     @Do65BitsShift
    //End of 8 bit (byte) partial division loop
      dec     cl                      //Decrement byte loop shift counter
      jns     @Shift8Bits             //Last jump at cl = 0!!!
    //End of 64 bit division loop
      mov     eax, edi                //Load result to eax:edx
      mov     edx, esi
    @RestoreRegisters:
      pop     ebp                     //Restore Registers
      pop     edi
      pop     esi
      pop     ebx
      ret
    @DivByZero:
      xor     eax, eax                //Here you can raise Div by 0 exception, now function only return 0.
      xor     edx, edx
      jmp     @RestoreRegisters
    end;
    

    At least one more speed optimisation is possible! After 'Huge Divisor Numbers Shift Optimisation' we can test divisors high bit, if it is 0 we do not need to use extra bh register as 65th bit to store in it. So unrolled part of loop can look like:

      shl     bl,1                    //Shift dividend left for one bit
      rcl     edi,1
      rcl     esi,1
      sub     edi, ebp                //Compare dividend and  divisor
      sbb     esi, edx                //Subtract the divisor
      jnc     @NoCarryAtCmpX
      add     edi, ebp                //Return privius dividend state
      adc     esi, edx
    @NoCarryAtCmpX:
    

提交回复
热议问题