This is the mail archive of the
gcc-bugs@gcc.gnu.org
mailing list for the GCC project.
[Bug target/37364] [4.4 Regression] IRA generates inefficient code due to missing regmove pass
- From: "bonzini at gnu dot org" <gcc-bugzilla at gcc dot gnu dot org>
- To: gcc-bugs at gcc dot gnu dot org
- Date: 24 Oct 2008 10:11:10 -0000
- Subject: [Bug target/37364] [4.4 Regression] IRA generates inefficient code due to missing regmove pass
- References: <bug-37364-682@http.gcc.gnu.org/bugzilla/>
- Reply-to: gcc-bugzilla at gcc dot gnu dot org
------- Comment #19 from bonzini at gnu dot org 2008-10-24 10:11 -------
Left = old, right = IRA.
It seems to me that the better register allocation of IRA gives the
post-regalloc scheduling pass much less freedom.
Intel guys, could you run SPEC with -O2 -fschedule-insns and -O2, both of them
using IRA?
.L4:
movsd (%esi,%eax,8), %xmm3 movl 12(%ebp), %edx
movsd (%ebx,%eax,8), %xmm4 movsd (%edx,%eax,8),
%xmm7
movsd (%ecx,%eax,8), %xmm6 movl -44(%ebp), %edx
movl 12(%ebp), %edx movsd %xmm7,
-40(%ebp)
movsd (%edx,%eax,8), %xmm1 movsd (%edx,%eax,8),
%xmm7
movl 16(%ebp), %edx movsd %xmm7,
-56(%ebp)
movapd %xmm1, %xmm0 movsd -40(%ebp),
%xmm7
movsd (%edx,%eax,8), %xmm2 mulsd (%ebx,%eax,8),
%xmm7
mulsd %xmm3, %xmm0 addsd %xmm7, %xmm6
movl 20(%ebp), %edx movsd -40(%ebp),
%xmm7
addsd -80(%ebp), %xmm0 mulsd (%esi,%eax,8),
%xmm7
movsd (%edx,%eax,8), %xmm5 addsd %xmm7, %xmm5
movsd %xmm0, -80(%ebp) movsd -40(%ebp),
%xmm7
incl %eax mulsd (%edi,%eax,8),
%xmm7
movapd %xmm1, %xmm0 addsd %xmm7, %xmm4
cmpl %eax, %edi movsd -56(%ebp),
%xmm7
mulsd %xmm4, %xmm0 mulsd (%ebx,%eax,8),
%xmm7
mulsd %xmm6, %xmm1 addsd %xmm7, %xmm3
addsd -72(%ebp), %xmm0 movsd -56(%ebp),
%xmm7
addsd -64(%ebp), %xmm1 mulsd (%esi,%eax,8),
%xmm7
movsd %xmm0, -72(%ebp) addsd %xmm7, %xmm2
movsd %xmm1, -64(%ebp) movsd -56(%ebp),
%xmm7
movapd %xmm2, %xmm0 mulsd (%edi,%eax,8),
%xmm7
mulsd %xmm3, %xmm0 addsd %xmm7, %xmm1
mulsd %xmm5, %xmm3 movsd (%ecx,%eax,8),
%xmm7
addsd -56(%ebp), %xmm0 mulsd (%ebx,%eax,8),
%xmm7
addsd -32(%ebp), %xmm3 addsd -32(%ebp),
%xmm7
movsd %xmm0, -56(%ebp) movsd %xmm7,
-32(%ebp)
movsd %xmm3, -32(%ebp) movsd (%ecx,%eax,8),
%xmm7
movapd %xmm2, %xmm0 mulsd (%esi,%eax,8),
%xmm7
mulsd %xmm6, %xmm2 addsd -24(%ebp),
%xmm7
mulsd %xmm4, %xmm0 movsd %xmm7,
-24(%ebp)
addsd -40(%ebp), %xmm2 movsd (%ecx,%eax,8),
%xmm7
mulsd %xmm5, %xmm4 mulsd (%edi,%eax,8),
%xmm7
addsd -48(%ebp), %xmm0 incl %eax
addsd -24(%ebp), %xmm4 addsd %xmm7, %xmm0
mulsd %xmm6, %xmm5 cmpl %eax, 8(%ebp)
movsd %xmm0, -48(%ebp) jg .L4
movsd %xmm2, -40(%ebp)
movsd %xmm4, -24(%ebp)
addsd %xmm5, %xmm7
jg .L4
--
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=37364