[Bug regression/54390] [AVX] FAIL: gcc.dg/vect/no-tree-sra-bb-slp-pr50730.c
hjl.tools at gmail dot com
gcc-bugzilla@gcc.gnu.org
Tue Aug 28 13:21:00 GMT 2012
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=54390
--- Comment #1 from H.J. Lu <hjl.tools at gmail dot com> 2012-08-28 13:21:29 UTC ---
Without AVX, we got
xorps %xmm4, %xmm4
shufps $0xe4, %xmm4, %xmm0
movlhps %xmm1, %xmm0
movaps %xmm4, %xmm1
movaps %xmm2, %xmm4
shufps $0xe4, %xmm1, %xmm4
movaps %xmm4, %xmm1
movlhps %xmm3, %xmm1
addps %xmm1, %xmm0
movhps %xmm0, -16(%rsp)
movq -16(%rsp), %rax
movlps %xmm0, -24(%rsp)
movq %rax, -48(%rsp)
movq -48(%rsp), %xmm1
movq -24(%rsp), %xmm0
ret
Adding -mtune=corei7, we got
movq %xmm1, -16(%rsp)
movq %xmm0, -24(%rsp)
movq %xmm2, -40(%rsp)
movups -24(%rsp), %xmm0
movq %xmm3, -32(%rsp)
movups -40(%rsp), %xmm1
addps %xmm1, %xmm0
movups %xmm0, -24(%rsp)
movq -16(%rsp), %rax
movq -24(%rsp), %xmm0
movd %rax, %xmm1
ret
With AVX, we got
vmovq %xmm0, -24(%rsp)
vmovq %xmm1, -16(%rsp)
vmovq %xmm2, -40(%rsp)
vmovq %xmm3, -32(%rsp)
vmovups -24(%rsp), %xmm1
vmovups -40(%rsp), %xmm0
vaddps %xmm0, %xmm1, %xmm0
vmovups %xmm0, -24(%rsp)
movq -16(%rsp), %rax
movq %rax, -48(%rsp)
vmovq -24(%rsp), %xmm0
vmovq -48(%rsp), %xmm1
ret
I think -mavx/-mtune=corei7 enables unaligned load/store which
improves vectorizer.
More information about the Gcc-bugs
mailing list