问题
I'm compiling my math library in GCC instead of MSVC for the first time and going through all the little errors, and I've hit one that simply makes no sense:
Line 284: error: lvalue required as left operand of assignment
What's on line 284? this:
_MM_TRANSPOSE4_PS(r, u, t, _mm_setr_ps(0.0f, 0.0f, 0.0f, 1.0f));
(r, u, and t are all instances of __m128
)
Those familiar with using xmmintrin.h
will be aware that _MM_TRANSPOSE4_PS
isn't actually a function, but rather a macro, which expands to:
/* Transpose the 4x4 matrix composed of row[0-3]. */
#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \
do { \
__v4sf __r0 = (row0), __r1 = (row1), __r2 = (row2), __r3 = (row3); \
__v4sf __t0 = __builtin_ia32_unpcklps (__r0, __r1); \
__v4sf __t1 = __builtin_ia32_unpcklps (__r2, __r3); \
__v4sf __t2 = __builtin_ia32_unpckhps (__r0, __r1); \
__v4sf __t3 = __builtin_ia32_unpckhps (__r2, __r3); \
(row0) = __builtin_ia32_movlhps (__t0, __t1); \
(row1) = __builtin_ia32_movhlps (__t1, __t0); \
(row2) = __builtin_ia32_movlhps (__t2, __t3); \
(row3) = __builtin_ia32_movhlps (__t3, __t2); \
} while (0)
So... what's causing my compiler errors? I don't redefine anything here, that I know of. This exact same code compiled and ran perfectly well when I was using MSVC.
回答1:
You need to change:
_MM_TRANSPOSE4_PS(r, u, t, _mm_setr_ps(0.0f, 0.0f, 0.0f, 1.0f));
to:
__m128 v = _mm_setr_ps(0.0f, 0.0f, 0.0f, 1.0f);
_MM_TRANSPOSE4_PS(r, u, t, v);
since this is an in-place transpose, and the 4 input vectors are also used for output.
回答2:
MSVC uses its own definition:
#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) { \
__m128 tmp3, tmp2, tmp1, tmp0; \
\
tmp0 = _mm_shuffle_ps((row0), (row1), 0x44); \
tmp2 = _mm_shuffle_ps((row0), (row1), 0xEE); \
tmp1 = _mm_shuffle_ps((row2), (row3), 0x44); \
tmp3 = _mm_shuffle_ps((row2), (row3), 0xEE); \
\
(row0) = _mm_shuffle_ps(tmp0, tmp1, 0x88); \
(row1) = _mm_shuffle_ps(tmp0, tmp1, 0xDD); \
(row2) = _mm_shuffle_ps(tmp2, tmp3, 0x88); \
(row3) = _mm_shuffle_ps(tmp2, tmp3, 0xDD); \
}
The last line is getting converted to _mm_setr_ps(0.0f, 0.0f, 0.0f, 1.0f) = _mm_shuffle_ps(tmp2,tmp3, 0XDD);
which compiles just fine in MSVC but fails with the lvalue error in GCC. I'm not sure why MSVC allows this.
I looked at the assembly output of this code in MSVC2013
#include <immintrin.h>
#include <stdio.h>
int main()
{
__m128 rows[4];
//rows[0] = _mm_setr_ps( 1, 2, 3, 4);
//rows[1] = _mm_setr_ps( 5, 6, 7, 8);
rows[2] = _mm_setr_ps( 9,10,11,12);
rows[3] = _mm_setr_ps(13,14,15,16);
//_MM_TRANSPOSE4_PS(rows[0],rows[1],rows[2],rows[3]);
//_MM_TRANSPOSE4_PS(rows[0],rows[1],rows[2],_mm_setr_ps(0.0f, 0.0f, 0.0f, 1.0f));
rows[2] = _mm_shuffle_ps(rows[2], rows[3], 0x88);
_mm_setr_ps(0.0f, 0.0f, 0.0f, 1.0f) = _mm_shuffle_ps(rows[2],rows[3], 0XDD);
}
Here is the relevant assembly code
; Line 14
mov eax, 16
imul rax, 3
mov ecx, 16
imul rcx, 2
movups xmm0, XMMWORD PTR rows$[rsp+rcx]
shufps xmm0, XMMWORD PTR rows$[rsp+rax], 136 ; 00000088H
movaps XMMWORD PTR $T6[rsp], xmm0
mov eax, 16
imul rax, 2
movaps xmm0, XMMWORD PTR $T6[rsp]
movups XMMWORD PTR rows$[rsp+rax], xmm0
; Line 15
mov eax, 16
imul rax, 3
mov ecx, 16
imul rcx, 2
movups xmm0, XMMWORD PTR rows$[rsp+rcx]
shufps xmm0, XMMWORD PTR rows$[rsp+rax], 221 ; 000000ddH
movaps XMMWORD PTR $T8[rsp], xmm0
movaps xmm0, XMMWORD PTR __xmm@3f800000000000000000000000000000
movaps XMMWORD PTR $T7[rsp], xmm0
movaps xmm0, XMMWORD PTR $T8[rsp]
movaps XMMWORD PTR $T7[rsp], xmm0
来源:https://stackoverflow.com/questions/25360355/mm-transpose4-ps-causes-compiler-errors-in-gcc