问题
I'm trying to learn x86-64 inline assembly and decided to implement this very simple swap method that simply orders a
and b
in ascending order:
#include <stdio.h>
void swap(int* a, int* b)
{
asm(".intel_syntax noprefix");
asm("mov eax, DWORD PTR [rdi]");
asm("mov ebx, DWORD PTR [rsi]");
asm("cmp eax, ebx");
asm("jle .L1");
asm("mov DWORD PTR [rdi], ebx");
asm("mov DWORD PTR [rsi], eax");
asm(".L1:");
asm(".att_syntax noprefix");
}
int main()
{
int input[3];
scanf("%d%d%d", &input[0], &input[1], &input[2]);
swap(&input[0], &input[1]);
swap(&input[1], &input[2]);
swap(&input[0], &input[1]);
printf("%d %d %d\n", input[0], input[1], input[2]);
return 0;
}
The above code works as expected when I run it with this command:
> gcc main.c
> ./a.out
> 3 2 1
> 1 2 3
However, as soon as I turn optimazation on I get the following error messages:
> gcc -O2 main.c
> main.c: Assembler messages:
> main.c:12: Error: symbol `.L1' is already defined
> main.c:12: Error: symbol `.L1' is already defined
> main.c:12: Error: symbol `.L1' is already defined
If I've understood it correctly, this is because gcc
tries to inline my swap
function when optimization is turned on, causing the label .L1
to be defined multiple times in the assembly file.
I've tried to find an answer to this problem, but nothing seems to work. In this previusly asked question it's suggested to use local labels instead, and I've tried that aswell:
#include <stdio.h>
void swap(int* a, int* b)
{
asm(".intel_syntax noprefix");
asm("mov eax, DWORD PTR [rdi]");
asm("mov ebx, DWORD PTR [rsi]");
asm("cmp eax, ebx");
asm("jle 1f");
asm("mov DWORD PTR [rdi], ebx");
asm("mov DWORD PTR [rsi], eax");
asm("1:");
asm(".att_syntax noprefix");
}
But when trying to run the program I now get a segmentation fault instead:
> gcc -O2 main.c
> ./a.out
> 3 2 1
> Segmentation fault
I also tried the suggested solution to this previusly asked question and changed the name .L1
to CustomLabel1
in case there would be a name collision, but it still gives me the old error:
> gcc -O2 main.c
> main.c: Assembler messages:
> main.c:12: Error: symbol `CustomLabel1' is already defined
> main.c:12: Error: symbol `CustomLabel1' is already defined
> main.c:12: Error: symbol `CustomLabel1' is already defined
Finally I also tried this suggestion:
void swap(int* a, int* b)
{
asm(".intel_syntax noprefix");
asm("mov eax, DWORD PTR [rdi]");
asm("mov ebx, DWORD PTR [rsi]");
asm("cmp eax, ebx");
asm("jle label%=");
asm("mov DWORD PTR [rdi], ebx");
asm("mov DWORD PTR [rsi], eax");
asm("label%=:");
asm(".att_syntax noprefix");
}
But then I get these errors instead:
main.c: Assembler messages:
main.c:9: Error: invalid character '=' in operand 1
main.c:12: Error: invalid character '%' in mnemonic
main.c:9: Error: invalid character '=' in operand 1
main.c:12: Error: invalid character '%' in mnemonic
main.c:9: Error: invalid character '=' in operand 1
main.c:12: Error: invalid character '%' in mnemonic
main.c:9: Error: invalid character '=' in operand 1
main.c:12: Error: invalid character '%' in mnemonic
So, my question is:
How can I use labels in inline assembly?
This is the disassemble output for the optimized version:
> gcc -O2 -S main.c
.file "main.c"
.section .text.unlikely,"ax",@progbits
.LCOLDB0:
.text
.LHOTB0:
.p2align 4,,15
.globl swap
.type swap, @function
swap:
.LFB23:
.cfi_startproc
#APP
# 5 "main.c" 1
.intel_syntax noprefix
# 0 "" 2
# 6 "main.c" 1
mov eax, DWORD PTR [rdi]
# 0 "" 2
# 7 "main.c" 1
mov ebx, DWORD PTR [rsi]
# 0 "" 2
# 8 "main.c" 1
cmp eax, ebx
# 0 "" 2
# 9 "main.c" 1
jle 1f
# 0 "" 2
# 10 "main.c" 1
mov DWORD PTR [rdi], ebx
# 0 "" 2
# 11 "main.c" 1
mov DWORD PTR [rsi], eax
# 0 "" 2
# 12 "main.c" 1
1:
# 0 "" 2
# 13 "main.c" 1
.att_syntax noprefix
# 0 "" 2
#NO_APP
ret
.cfi_endproc
.LFE23:
.size swap, .-swap
.section .text.unlikely
.LCOLDE0:
.text
.LHOTE0:
.section .rodata.str1.1,"aMS",@progbits,1
.LC1:
.string "%d%d%d"
.LC2:
.string "%d %d %d\n"
.section .text.unlikely
.LCOLDB3:
.section .text.startup,"ax",@progbits
.LHOTB3:
.p2align 4,,15
.globl main
.type main, @function
main:
.LFB24:
.cfi_startproc
subq $40, %rsp
.cfi_def_cfa_offset 48
movl $.LC1, %edi
movq %fs:40, %rax
movq %rax, 24(%rsp)
xorl %eax, %eax
leaq 8(%rsp), %rcx
leaq 4(%rsp), %rdx
movq %rsp, %rsi
call __isoc99_scanf
#APP
# 5 "main.c" 1
.intel_syntax noprefix
# 0 "" 2
# 6 "main.c" 1
mov eax, DWORD PTR [rdi]
# 0 "" 2
# 7 "main.c" 1
mov ebx, DWORD PTR [rsi]
# 0 "" 2
# 8 "main.c" 1
cmp eax, ebx
# 0 "" 2
# 9 "main.c" 1
jle 1f
# 0 "" 2
# 10 "main.c" 1
mov DWORD PTR [rdi], ebx
# 0 "" 2
# 11 "main.c" 1
mov DWORD PTR [rsi], eax
# 0 "" 2
# 12 "main.c" 1
1:
# 0 "" 2
# 13 "main.c" 1
.att_syntax noprefix
# 0 "" 2
# 5 "main.c" 1
.intel_syntax noprefix
# 0 "" 2
# 6 "main.c" 1
mov eax, DWORD PTR [rdi]
# 0 "" 2
# 7 "main.c" 1
mov ebx, DWORD PTR [rsi]
# 0 "" 2
# 8 "main.c" 1
cmp eax, ebx
# 0 "" 2
# 9 "main.c" 1
jle 1f
# 0 "" 2
# 10 "main.c" 1
mov DWORD PTR [rdi], ebx
# 0 "" 2
# 11 "main.c" 1
mov DWORD PTR [rsi], eax
# 0 "" 2
# 12 "main.c" 1
1:
# 0 "" 2
# 13 "main.c" 1
.att_syntax noprefix
# 0 "" 2
# 5 "main.c" 1
.intel_syntax noprefix
# 0 "" 2
# 6 "main.c" 1
mov eax, DWORD PTR [rdi]
# 0 "" 2
# 7 "main.c" 1
mov ebx, DWORD PTR [rsi]
# 0 "" 2
# 8 "main.c" 1
cmp eax, ebx
# 0 "" 2
# 9 "main.c" 1
jle 1f
# 0 "" 2
# 10 "main.c" 1
mov DWORD PTR [rdi], ebx
# 0 "" 2
# 11 "main.c" 1
mov DWORD PTR [rsi], eax
# 0 "" 2
# 12 "main.c" 1
1:
# 0 "" 2
# 13 "main.c" 1
.att_syntax noprefix
# 0 "" 2
#NO_APP
movl 8(%rsp), %r8d
movl 4(%rsp), %ecx
movl $.LC2, %esi
movl (%rsp), %edx
xorl %eax, %eax
movl $1, %edi
call __printf_chk
movq 24(%rsp), %rsi
xorq %fs:40, %rsi
jne .L6
xorl %eax, %eax
addq $40, %rsp
.cfi_remember_state
.cfi_def_cfa_offset 8
ret
.L6:
.cfi_restore_state
call __stack_chk_fail
.cfi_endproc
.LFE24:
.size main, .-main
.section .text.unlikely
.LCOLDE3:
.section .text.startup
.LHOTE3:
.ident "GCC: (Ubuntu 5.4.0-6ubuntu1~16.04.4) 5.4.0 20160609"
.section .note.GNU-stack,"",@progbits
回答1:
There are plenty of tutorials - including this one (probably the best I know of), and some info on operand size modifiers.
Here's the first implementation - swap_2
:
void swap_2 (int *a, int *b)
{
int tmp0, tmp1;
__asm__ volatile (
"movl (%0), %k2\n\t" /* %2 (tmp0) = (*a) */
"movl (%1), %k3\n\t" /* %3 (tmp1) = (*b) */
"cmpl %k3, %k2\n\t"
"jle %=f\n\t" /* if (%2 <= %3) (at&t!) */
"movl %k3, (%0)\n\t"
"movl %k2, (%1)\n\t"
"%=:\n\t"
: "+r" (a), "+r" (b), "=r" (tmp0), "=r" (tmp1) :
: "memory" /* "cc" */ );
}
A few notes:
volatile
(or__volatile__
) is required, as the compiler only 'sees'(a)
and(b)
(and doesn't 'know' you're potentially exchanging their contents), and would otherwise be free to optimize the wholeasm
statement away -tmp0
andtmp1
would otherwise be considered unused variables too."+r"
means that this is both an input and output that may be modified; only it isn't in this case, and they could strictly be input only - more on that in a bit...The 'l' suffix on 'movl' isn't really necessary; neither is the 'k' (32-bit) length modifier for the registers. Since you're using the Linux (ELF) ABI, an
int
is 32 bits for both IA32 and x86-64 ABIs.The
%=
token generates a unique label for us. BTW, the jump syntax<label>f
means a forward jump, and<label>b
means back.For correctness, we need
"memory"
as the compiler has no way of knowing if values from dereferenced pointers have been changed. This may be an issue in more complex inline asm surrounded by C code, as it invalidates all currently held values in memory - and is often a sledgehammer approach. Appearing at the end of a function in this fashion, it's not going to be an issue - but you can read more on it here (see: Clobbers)The
"cc"
flags register clobber is detailed in the same section. on x86, it does nothing. Some writers include it for clarity, but since practically all non-trivialasm
statements affect the flags register, it's just assumed to be clobbered by default.
Here's the C implementation - swap_1
:
void swap_1 (int *a, int *b)
{
if (*a > *b)
{
int t = *a; *a = *b; *b = t;
}
}
Compiling with gcc -O2
for x86-64 ELF, I get identical code. Just a bit of luck that the compiler chose tmp0
and tmp1
to use the same free registers for temps... cutting out the noise, like the .cfi directives, etc., gives:
swap_2:
movl (%rdi), %eax
movl (%rsi), %edx
cmpl %edx, %eax
jle 21f
movl %edx, (%rdi)
movl %eax, (%rsi)
21:
ret
As stated, the swap_1
code was identical, except that the compiler chose .L1
for its jump label. Compiling the code with -m32
generated the same code (apart from using the tmp registers in a different order). There's more overhead, as the IA32 ELF ABI passes parameters on the stack, whereas the x86-64 ABI passes the first two parameters in %rdi
and %rsi
respectively.
Treating (a)
and (b)
as input only - swap_3
:
void swap_3 (int *a, int *b)
{
int tmp0, tmp1;
__asm__ volatile (
"mov (%[a]), %[x]\n\t" /* x = (*a) */
"mov (%[b]), %[y]\n\t" /* y = (*b) */
"cmp %[y], %[x]\n\t"
"jle %=f\n\t" /* if (x <= y) (at&t!) */
"mov %[y], (%[a])\n\t"
"mov %[x], (%[b])\n\t"
"%=:\n\t"
: [x] "=&r" (tmp0), [y] "=&r" (tmp1)
: [a] "r" (a), [b] "r" (b) : "memory" /* "cc" */ );
}
I've done away with the 'l' suffix and 'k' modifiers here, because they're not needed. I've also used the 'symbolic name' syntax for operands, as it often helps to make the code more readable.
(a)
and (b)
are now indeed input-only registers. So what's the "=&r"
syntax mean? The &
denotes an early clobber operand. In this case, the value may be written to before we finish using the input operands, and therefore the compiler must choose registers different from those selected for the input operands.
Once again, the compiler generates identical code as it did for swap_1
and swap_2
.
I wrote way more than I planned on this answer, but as you can see, it's very difficult to maintain awareness of all the information the compiler must be made aware of, as well as the idiosyncrasies of each instruction set (ISA) and ABI.
回答2:
You cannot just put a bunch of asm
statements inline like that. The optimizer is free to re-order, duplicate, and drop them based on what constraints it knows. (In your case, it knows none.)
So firstly, you should consolidate the asm together, with proper read/write/clobber constraints. Secondly, there is a special asm goto
form that gives assembly to C-level labels.
void swap(int *a, int *b) {
int tmp1, tmp2;
asm(
"mov (%2), %0\n"
"mov (%3), %1\n"
: "=r" (tmp1), "=r" (tmp2)
: "r" (a), "r" (b)
: "memory" // pointer in register doesn't imply that the pointed-to memory has to be "in sync"
// or use "m" memory source operands to let the compiler pick the addressing mode
);
asm goto(
"cmp %1, %0\n"
"jle %l4\n"
"mov %1, (%2)\n"
"mov %0, (%3)\n"
:
: "r" (tmp1), "r" (tmp2), "r" (a), "r" (b)
: "cc", "memory"
: L1
);
L1:
return;
}
回答3:
You cannot assume values are in any particular register in your asm code -- you need to use constraints to tell gcc what values you want to read and write and get it to tell you which register they are in. The gcc docs tell you most of what you need to know, but are pretty dense. There are also tutorials out there that you can easily find with a web search (here or here)
来源:https://stackoverflow.com/questions/42681720/how-do-i-use-labels-in-gcc-inline-assembly