C Calling Conventions 32bit to NASM with float (movups/movupd difference)

放肆的年华 提交于 2019-12-12 04:59:29

问题


I have this func in C. When I use istructions like: movss, movaps, movups all work propely, instead when I use istructions like: movupd, movapd, ecc.. it not work.. and return strange values

CODE THAT WORK PROPELY WITH movaps, movups,ecc..

C:

extern float test(float* a,float* b, int num, int spuri, float* res);


int main(int argc, char** argv) {
    float a[] = { 1.0, 2.0, 3.0, 4.0, 6.0, 9.0 };
    float b[] = { 3.0, 4.0, 4.0, 5.0, 5.0, 8.0 };
    int d=6;
    int num=d/4;
    int spuri=d-(num*4);
    float res=-1.0;
    test(a,b,num,spuri,&res);

    printf("res: %f\n",res);

    return 1;
}

NASM:

%include "sseutils.nasm"

section .data           


section .bss            

alignb 16
A:  resd    1
T:  resd    4


section .text           

global test

a           equ     8   
b           equ     12  
num         equ         16      
spuri       equ         20
result      equ     24

test:
        push    ebp             
        mov     ebp, esp        
        push    ebx             
        push    esi
        push    edi

        mov         esi, [ebp+a]                
        mov         edi, [ebp+b]                
        mov         ebx, 0              
        mov         ecx, [ebp+num]              
        mov         edx, [ebp+spuri]
        mov         eax,[ebp+result]                
        xorps       xmm1,xmm1           
        xorps       xmm3,xmm3           

loop1:
        cmp ecx,0
            je loop2
        movups      xmm0, [esi+ebx]     
        movups      xmm6, [edi+ebx]
        subps       xmm0, xmm6          
        mulps       xmm0, xmm0          
        sqrtps      xmm0, xmm0
        addps       xmm1, xmm0          
        add         ebx, 16             
        dec         ecx                 
        jnz         loop1



loop2:

        cmp edx,0
                je end
        movss   xmm2,[esi+ebx]
        movss   xmm7,[edi+ebx]
        subps   xmm2, xmm7
        mulps   xmm2, xmm2
        sqrtps  xmm2, xmm2
        addps   xmm3, xmm2
        add     ebx,4
        dec     edx
        jnz     loop2


end:
        haddps      xmm1,xmm1
        haddps      xmm1,xmm1
        addps       xmm1,xmm3
        movups      [eax],xmm1








        pop edi                     
        pop     esi
        pop     ebx
        mov esp, ebp                
        pop ebp                     
        ret                         

This return correct value, but I need more precision because the number is in floating point and is needed to use movupd, or similar istructions..

How it must modified the previous code to use instructions as MOVUPD, MOVAPD or similar?

来源:https://stackoverflow.com/questions/37257665/c-calling-conventions-32bit-to-nasm-with-float-movups-movupd-difference

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!