_MM_TRANSPOSE4_PS causes compiler errors in GCC?

问题

I'm compiling my math library in GCC instead of MSVC for the first time and going through all the little errors, and I've hit one that simply makes no sense:

Line 284: error: lvalue required as left operand of assignment

What's on line 284? this:

_MM_TRANSPOSE4_PS(r, u, t, _mm_setr_ps(0.0f, 0.0f, 0.0f, 1.0f));

(r, u, and t are all instances of __m128)

Those familiar with using xmmintrin.h will be aware that _MM_TRANSPOSE4_PS isn't actually a function, but rather a macro, which expands to:

/* Transpose the 4x4 matrix composed of row[0-3].  */
#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3)           \
do {                                    \
  __v4sf __r0 = (row0), __r1 = (row1), __r2 = (row2), __r3 = (row3);    \
  __v4sf __t0 = __builtin_ia32_unpcklps (__r0, __r1);           \
  __v4sf __t1 = __builtin_ia32_unpcklps (__r2, __r3);           \
  __v4sf __t2 = __builtin_ia32_unpckhps (__r0, __r1);           \
  __v4sf __t3 = __builtin_ia32_unpckhps (__r2, __r3);           \
  (row0) = __builtin_ia32_movlhps (__t0, __t1);             \
  (row1) = __builtin_ia32_movhlps (__t1, __t0);             \
  (row2) = __builtin_ia32_movlhps (__t2, __t3);             \
  (row3) = __builtin_ia32_movhlps (__t3, __t2);             \
} while (0)

So... what's causing my compiler errors? I don't redefine anything here, that I know of. This exact same code compiled and ran perfectly well when I was using MSVC.

回答1:

You need to change:

_MM_TRANSPOSE4_PS(r, u, t, _mm_setr_ps(0.0f, 0.0f, 0.0f, 1.0f));

to:

__m128 v = _mm_setr_ps(0.0f, 0.0f, 0.0f, 1.0f);
_MM_TRANSPOSE4_PS(r, u, t, v);

since this is an in-place transpose, and the 4 input vectors are also used for output.

回答2:

MSVC uses its own definition:

#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) {                 \
            __m128 tmp3, tmp2, tmp1, tmp0;                          \
                                                                    \
            tmp0   = _mm_shuffle_ps((row0), (row1), 0x44);          \
            tmp2   = _mm_shuffle_ps((row0), (row1), 0xEE);          \
            tmp1   = _mm_shuffle_ps((row2), (row3), 0x44);          \
            tmp3   = _mm_shuffle_ps((row2), (row3), 0xEE);          \
                                                                    \
            (row0) = _mm_shuffle_ps(tmp0, tmp1, 0x88);              \
            (row1) = _mm_shuffle_ps(tmp0, tmp1, 0xDD);              \
            (row2) = _mm_shuffle_ps(tmp2, tmp3, 0x88);              \
            (row3) = _mm_shuffle_ps(tmp2, tmp3, 0xDD);              \
        }

The last line is getting converted to _mm_setr_ps(0.0f, 0.0f, 0.0f, 1.0f) = _mm_shuffle_ps(tmp2,tmp3, 0XDD); which compiles just fine in MSVC but fails with the lvalue error in GCC. I'm not sure why MSVC allows this.

I looked at the assembly output of this code in MSVC2013

#include <immintrin.h>
#include <stdio.h>
int main() 
{

    __m128 rows[4];
    //rows[0] = _mm_setr_ps( 1, 2, 3, 4);
    //rows[1] = _mm_setr_ps( 5, 6, 7, 8);
    rows[2] = _mm_setr_ps( 9,10,11,12);
    rows[3] = _mm_setr_ps(13,14,15,16);

    //_MM_TRANSPOSE4_PS(rows[0],rows[1],rows[2],rows[3]);
    //_MM_TRANSPOSE4_PS(rows[0],rows[1],rows[2],_mm_setr_ps(0.0f, 0.0f, 0.0f, 1.0f));
    rows[2] = _mm_shuffle_ps(rows[2], rows[3], 0x88);
    _mm_setr_ps(0.0f, 0.0f, 0.0f, 1.0f) = _mm_shuffle_ps(rows[2],rows[3], 0XDD);
}

Here is the relevant assembly code

; Line 14
    mov eax, 16
    imul    rax, 3
    mov ecx, 16
    imul    rcx, 2
    movups  xmm0, XMMWORD PTR rows$[rsp+rcx]
    shufps  xmm0, XMMWORD PTR rows$[rsp+rax], 136   ; 00000088H
    movaps  XMMWORD PTR $T6[rsp], xmm0
    mov eax, 16
    imul    rax, 2
    movaps  xmm0, XMMWORD PTR $T6[rsp]
    movups  XMMWORD PTR rows$[rsp+rax], xmm0
; Line 15
    mov eax, 16
    imul    rax, 3
    mov ecx, 16
    imul    rcx, 2
    movups  xmm0, XMMWORD PTR rows$[rsp+rcx]
    shufps  xmm0, XMMWORD PTR rows$[rsp+rax], 221   ; 000000ddH
    movaps  XMMWORD PTR $T8[rsp], xmm0
    movaps  xmm0, XMMWORD PTR __xmm@3f800000000000000000000000000000
    movaps  XMMWORD PTR $T7[rsp], xmm0
    movaps  xmm0, XMMWORD PTR $T8[rsp]
    movaps  XMMWORD PTR $T7[rsp], xmm0

来源：https://stackoverflow.com/questions/25360355/mm-transpose4-ps-causes-compiler-errors-in-gcc

标签

c++

visual-c++

gcc

sse

intrinsics