When using -O>=1, gcc uses the wrong register for the inline assembly below. In my actual usecase, it even does bswap %eax xorl %eax, %eax so it instantly threw away the value. When using -m32, it seems to generate correct code. == CODE FILES == asgard:/tmp$ cat x.c #include <stdint.h> uint32_t x() { return 0x11223344; } asgard:/tmp$ cat test.c #include <stdint.h> #include <stdio.h> extern uint32_t x(); static inline __attribute__((always_inline)) bswap32(uint32_t i) { asm("bswap %0" : "=q"(i) : "q"(i)); return i; } int main() { printf("%08X\n", bswap32(x())); return 0; } == OUTPUT == asgard:/tmp$ gcc test.c x.c asgard:/tmp$ ./a.out 44332211 asgard:/tmp$ gcc -O1 test.c x.c asgard:/tmp$ ./a.out 381E625D == ASSEMBLY OUTPUT (only important parts) == asgard:/tmp$ gcc -S test.c asgard:/tmp$ cat test.s […] movl $0, %eax call x movl %eax, -4(%rbp) movl -4(%rbp), %eax #APP # 9 "test.c" 1 bswap %eax # 0 "" 2 #NO_APP movl %eax, -4(%rbp) movl -4(%rbp), %eax movl %eax, %edx movl $.LC0, %eax movl %edx, %esi movq %rax, %rdi movl $0, %eax call printf […] asgard:/tmp$ gcc -S -O1 test.c asgard:/tmp$ cat test.s […] movl $0, %eax call x #APP # 9 "test.c" 1 bswap %edx # 0 "" 2 #NO_APP movl $.LC0, %esi movl $1, %edi movl $0, %eax call __printf_chk […]
well, asm("bswap %0" : "=q"(i) : "q"(i)); is wrong. You probably want asm("bswap %0" : "=q"(i) : "0"(i));