Much like http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19274, gcc pays a visit to the stack for _mm_set1_epi32 with a memory reference. With a couple of days old cvs, -O2 -march=k8 -mfpmath=sse #include <xmmintrin.h> __m128i eliminated(const int i) { return _mm_set1_epi32(i); } __m128i not_eliminated(const int *i) { return _mm_set1_epi32(*i); } __m128i not_eliminated_bis(const int &i) { return _mm_set1_epi32(i); } int main() { return 0; } gives: 00401086 <eliminated(int)>: 401086: 66 0f 6e 44 24 04 movd 0x4(%esp),%xmm0 40108c: 66 0f 70 c0 00 pshufd $0x0,%xmm0,%xmm0 401091: c3 ret 00401092 <not_eliminated(int const*)>: 401092: 83 ec 04 sub $0x4,%esp 401095: 8b 44 24 08 mov 0x8(%esp),%eax 401099: 8b 00 mov (%eax),%eax 40109b: 89 04 24 mov %eax,(%esp) 40109e: 66 0f 6e 0c 24 movd (%esp),%xmm1 4010a3: 66 0f 70 c1 00 pshufd $0x0,%xmm1,%xmm0 4010a8: 83 c4 04 add $0x4,%esp 4010ab: c3 ret
This is .t69.final_cleanup: ;; Function long long int __vector__ not_eliminated_bis(const int&) (_Z18not_eliminated_bisRKi) long long int __vector__ not_eliminated_bis(const int&) (i) { int __q0; <bb 0>: __q0 = *i; return (__m128i) {__q0, __q0, __q0, __q0}; } ;; Function int main() (main) int main() () { <bb 0>: return 0; } ;; Function long long int __vector__ eliminated(int) (_Z10eliminatedi) long long int __vector__ eliminated(int) (i) { <bb 0>: return (__m128i) {i, i, i, i}; } ;; Function long long int __vector__ not_eliminated(const int*) (_Z14not_eliminatedPKi) long long int __vector__ not_eliminated(const int*) (i) { int __q0; <bb 0>: __q0 = *i; return (__m128i) {__q0, __q0, __q0, __q0}; } We have the assignment __q0 = *i in every function where we expect this temporary to be eliminated.
Fixed by http://gcc.gnu.org/ml/gcc-patches/2005-02/msg00127.html: 00000000 <_Z18not_eliminated_bisRKi>: 0: 55 push %ebp 1: 89 e5 mov %esp,%ebp 3: 8b 45 08 mov 0x8(%ebp),%eax 6: 66 0f 6e 00 movd (%eax),%xmm0 a: 66 0f 70 c0 00 pshufd $0x0,%xmm0,%xmm0 f: 5d pop %ebp 10: c3 ret 00000032 <_Z14not_eliminatedPKi>: 32: 55 push %ebp 33: 89 e5 mov %esp,%ebp 35: 8b 45 08 mov 0x8(%ebp),%eax 38: 66 0f 6e 00 movd (%eax),%xmm0 3c: 66 0f 70 c0 00 pshufd $0x0,%xmm0,%xmm0 41: 5d pop %ebp 42: c3 ret