[Bug tree-optimization/88760] GCC unrolling is suboptimal
ktkachov at gcc dot gnu.org
gcc-bugzilla@gcc.gnu.org
Tue Jan 15 11:08:00 GMT 2019
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88760
--- Comment #8 from ktkachov at gcc dot gnu.org ---
btw looks likes ICC vectorises this as well as unrolling:
..B1.14:
movl (%rcx,%rbx,4), %r15d
vmovsd (%rdi,%r15,8), %xmm2
movl 4(%rcx,%rbx,4), %r15d
vmovhpd (%rdi,%r15,8), %xmm2, %xmm3
movl 8(%rcx,%rbx,4), %r15d
vfmadd231pd (%r10,%rbx,8), %xmm3, %xmm0
vmovsd (%rdi,%r15,8), %xmm4
movl 12(%rcx,%rbx,4), %r15d
vmovhpd (%rdi,%r15,8), %xmm4, %xmm5
movl 16(%rcx,%rbx,4), %r15d
vfmadd231pd 16(%r10,%rbx,8), %xmm5, %xmm1
vmovsd (%rdi,%r15,8), %xmm6
movl 20(%rcx,%rbx,4), %r15d
vmovhpd (%rdi,%r15,8), %xmm6, %xmm7
movl 24(%rcx,%rbx,4), %r15d
vfmadd231pd 32(%r10,%rbx,8), %xmm7, %xmm0
vmovsd (%rdi,%r15,8), %xmm8
movl 28(%rcx,%rbx,4), %r15d
vmovhpd (%rdi,%r15,8), %xmm8, %xmm9
vfmadd231pd 48(%r10,%rbx,8), %xmm9, %xmm1
addq $8, %rbx
cmpq %r14, %rbx
jb ..B1.14
Is that something GCC could reasonably do?
More information about the Gcc-bugs
mailing list