[Bug tree-optimization/53355] Autovectorization of a simple loop could be improved.
rguenth at gcc dot gnu.org
gcc-bugzilla@gcc.gnu.org
Tue May 15 13:46:00 GMT 2012
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=53355
--- Comment #3 from Richard Guenther <rguenth at gcc dot gnu.org> 2012-05-15 13:19:06 UTC ---
With the patch we now emit
foo:
.LFB0:
.cfi_startproc
movq %rdi, %rax
salq $60, %rax
sarq $63, %rax
movq %rax, %rdx
andl $1, %edx
testl %edx, %edx
je .L7
movsd .LC0(%rip), %xmm0
movl $99999, %r10d
movl $1, %r11d
addsd (%rdi), %xmm0
movsd %xmm0, (%rdi)
.L2:
movl $100000, %r8d
andl $1, %eax
subl %edx, %r8d
movapd .LC1(%rip), %xmm1
movl %r8d, %esi
leaq (%rdi,%rax,8), %rcx
xorl %edx, %edx
shrl %esi
xorl %eax, %eax
leal (%rsi,%rsi), %r9d
.p2align 4,,10
.p2align 3
.L6:
movapd (%rcx,%rax), %xmm0
addl $1, %edx
addpd %xmm1, %xmm0
movapd %xmm0, (%rcx,%rax)
addq $16, %rax
cmpl %esi, %edx
jb .L6
subl %r9d, %r10d
cmpl %r9d, %r8d
leal (%r11,%r9), %edx
je .L1
leal -1(%r10), %ecx
movslq %edx, %rdx
leaq 0(,%rdx,8), %rax
movsd .LC0(%rip), %xmm1
leaq 1(%rdx,%rcx), %rdx
salq $3, %rdx
.p2align 4,,10
.p2align 3
.L5:
movsd (%rdi,%rax), %xmm0
addsd %xmm1, %xmm0
movsd %xmm0, (%rdi,%rax)
addq $8, %rax
cmpq %rdx, %rax
jne .L5
.L1:
rep
ret
.L7:
movl $100000, %r10d
xorl %r11d, %r11d
jmp .L2
More information about the Gcc-bugs
mailing list