128-bit shifts using assembly language?

后端 未结 2 633
南笙
南笙 2021-01-05 01:08

What is the most efficient way to do 128 bit shift on a modern Intel CPU (core i7, sandy bridge).

A similar code is in my most inner loop:

u128 a[N];         


        
2条回答
  •  梦谈多话
    2021-01-05 01:36

    In this particular case you could use a combination of x86 SHR and RCR instructions:

    ; a0 - bits 0-31 of a[i]
    ; a1 - bits 32-63 of a[i]
    ; a2 - bits 64-95 of a[i]
    ; a3 - bits 96-127 of a[i]
    mov eax, a0
    mov ebx, a1
    mov ecx, a2
    mov ecx, a3
    
    shr eax, 1
    rcr ebx, 1
    rcr ecx, 1
    rcr edx, 1
    
    ; b0 - bits 0-31 of b[i] := a[i] >> 1
    ; b1 - bits 32-63 of b[i] := a[i] >> 1
    ; b2 - bits 64-95 of b[i] := a[i] >> 1
    ; b3 - bits 96-127 of b[i] := a[i] >> 1
    mov b0, eax
    mov b1, ebx
    mov b2, ecx
    mov b3, edx
    
    shr eax, 1
    rcr ebx, 1
    rcr ecx, 1
    rcr edx, 1
    
    ; c0 - bits 0-31 of c[i] := a[i] >> 2 = b[i] >> 1
    ; c1 - bits 32-63 of c[i] := a[i] >> 2 = b[i] >> 1
    ; c2 - bits 64-95 of c[i] := a[i] >> 2 = b[i] >> 1
    ; c3 - bits 96-127 of c[i] := a[i] >> 2 = b[i] >> 1
    mov c0, eax
    mov c1, ebx
    mov c2, ecx
    mov c3, edx
    

    If your target is x86-64 this simplifies to:

    ; a0 - bits 0-63 of a[i]
    ; a1 - bits 64-127 of a[i]
    mov rax, a0
    mov rbx, a1
    
    shr rax, 1
    rcr rbx, 1
    
    ; b0 - bits 0-63 of b[i] := a[i] >> 1
    ; b1 - bits 64-127 of b[i] := a[i] >> 1
    mov b0, rax
    mov b1, rbx
    
    shr rax, 1
    rcr rbx, 1
    
    ; c0 - bits 0-63 of c[i] := a[i] >> 2 = b[i] >> 1
    ; c1 - bits 64-127 of c[i] := a[i] >> 2 = b[i] >> 1
    mov c0, rax
    mov c1, rbx
    

    Update: corrected typos in 64-bit version

提交回复
热议问题