This is the mail archive of the
gcc-bugs@gcc.gnu.org
mailing list for the GCC project.
[Bug c/79938] gcc unnecessarily spills xmm register to stack when inserting vector items
- From: "postmaster at raasu dot org" <gcc-bugzilla at gcc dot gnu dot org>
- To: gcc-bugs at gcc dot gnu dot org
- Date: Tue, 07 Mar 2017 15:38:04 +0000
- Subject: [Bug c/79938] gcc unnecessarily spills xmm register to stack when inserting vector items
- Auto-submitted: auto-generated
- References: <bug-79938-4@http.gcc.gnu.org/bugzilla/>
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79938
--- Comment #3 from postmaster at raasu dot org ---
With -mssse3 instead of -msse4.1, the issue gets even worse:
---
...
pxor %xmm1, %xmm1
movl $.LC0, %esi
movl $1, %edi
movd %eax, %xmm0
movdqa %xmm0, %xmm4
pshufb %xmm1, %xmm4
movaps %xmm4, (%rsp)
movzbl (%rsp), %eax
movaps %xmm4, 224(%rsp)
movzbl 225(%rsp), %edx
movaps %xmm4, 208(%rsp)
movaps %xmm4, 192(%rsp)
movaps %xmm4, 176(%rsp)
addl %edx, %eax
movzbl 210(%rsp), %edx
movaps %xmm4, 160(%rsp)
movaps %xmm4, 144(%rsp)
movaps %xmm4, 128(%rsp)
movaps %xmm4, 112(%rsp)
addl %edx, %eax
movzbl 195(%rsp), %edx
movaps %xmm4, 96(%rsp)
movzbl 105(%rsp), %ecx
movaps %xmm4, 80(%rsp)
movaps %xmm4, 64(%rsp)
movaps %xmm4, 48(%rsp)
addl %edx, %eax
movzbl 165(%rsp), %edx
movaps %xmm4, 32(%rsp)
movd %eax, %xmm0
movzbl 180(%rsp), %eax
movaps %xmm4, 16(%rsp)
movaps %xmm4, 240(%rsp)
addl %edx, %eax
movzbl 150(%rsp), %edx
addl %edx, %eax
movzbl 135(%rsp), %edx
addl %eax, %edx
movzbl 120(%rsp), %eax
movd %edx, %xmm6
punpckldq %xmm6, %xmm0
addl %ecx, %eax
movzbl 90(%rsp), %ecx
addl %ecx, %eax
movzbl 75(%rsp), %ecx
addl %ecx, %eax
movzbl 45(%rsp), %ecx
movd %eax, %xmm1
movzbl 60(%rsp), %eax
addl %ecx, %eax
movzbl 30(%rsp), %ecx
addl %ecx, %eax
movzbl 15(%rsp), %ecx
addl %ecx, %eax
movd %eax, %xmm5
xorl %eax, %eax
punpckldq %xmm5, %xmm1
punpcklqdq %xmm1, %xmm0
movdqa %xmm0, %xmm2
movd %xmm0, %edx
pshufd $255, %xmm0, %xmm3
punpckhdq %xmm0, %xmm2
pshufd $85, %xmm0, %xmm1
...
---
Notice all the lines starting with " movaps %xmm4,"
Same register contents are polluted all over the stack.