This is the mail archive of the
gcc-bugs@gcc.gnu.org
mailing list for the GCC project.
optimization/6880: Inlining inefficiencies
- From: Dan Nicolaescu <dann at godzilla dot ics dot uci dot edu>
- To: gcc-gnats at gcc dot gnu dot org
- Date: 31 May 2002 07:33:47 -0000
- Subject: optimization/6880: Inlining inefficiencies
- Reply-to: Dan Nicolaescu <dann at godzilla dot ics dot uci dot edu>
>Number: 6880
>Category: optimization
>Synopsis: Inlining inefficiencies
>Confidential: no
>Severity: serious
>Priority: medium
>Responsible: unassigned
>State: open
>Class: pessimizes-code
>Submitter-Id: net
>Arrival-Date: Fri May 31 00:36:01 PDT 2002
>Closed-Date:
>Last-Modified:
>Originator: Dan Nicolaescu <dann@godzilla.ics.uci.edu>
>Release: gcc-3.1
>Organization:
>Environment:
sparc-sun-solars2.7
>Description:
There are some problems with inlining as shown by the code below
(derived from oopack)
class Complex_d {
public:
double re, im;
Complex_d (double r, double i) : re(r), im(i) {}
Complex_d () {}
};
inline Complex_d operator+ (Complex_d a, Complex_d b)
{
return Complex_d (a.re+b.re, a.im+b.im);
}
Complex_d Zd;
void
foo (void)
{
Complex_d factor (123.2374, 428.234);
// You'd think this function would be optimized to just do a couple of
// stores? Nope.
Zd = factor + factor;
}
class Complex_i {
public:
int re, im;
Complex_i (int r, int i) : re(r), im(i) {}
Complex_i () {}
};
inline Complex_i operator+ (Complex_i a, Complex_i b)
{
return Complex_i ( a.re+b.re, a.im+b.im);
}
Complex_i Zi;
void
bar (void)
{
Complex_i factor (123, 428);
Zi = factor + factor;
}
void
foobar (void)
{
Complex_i factor (123, 428);
factor = factor + factor;
}
The SPARC assembly generated by gcc-3.1 -O3 is:
_Z3foov:
.LLFB2:
!#PROLOGUE# 0
save %sp, -176, %sp
.LLCFI0:
!#PROLOGUE# 1
sethi %hi(.LLC0), %i0
ldd [%i0+%lo(.LLC0)], %f14
sethi %hi(.LLC1), %g1
std %f14, [%fp-32]
ldd [%fp-32], %i2
sethi %hi(Zd), %i4
ldd [%g1+%lo(.LLC1)], %f2
std %i2, [%fp-80]
std %i2, [%fp-64]
std %f2, [%fp-24]
ldd [%fp-64], %f12
or %i4, %lo(Zd), %g1
ldd [%fp-80], %f4
ldd [%fp-24], %i0
faddd %f12, %f4, %f8
std %i0, [%fp-72]
std %i0, [%fp-56]
ldd [%fp-56], %f10
ldd [%fp-72], %f6
std %f8, [%fp-48]
faddd %f10, %f6, %f0
ldd [%fp-48], %i0
std %i0, [%i4+%lo(Zd)]
std %f0, [%fp-40]
ldd [%fp-40], %i0
std %i0, [%g1+8]
nop
ret
restore
A couple of things are wrong here:
1. the reserved stack space is too big
2. most of the stores are dead stores but they are not eliminated.
Isn't there enough information to determine that?
3. it looks like arguments to the inlined functions are moved from
memory to integer registers then back to memory then to fp
registers. (side note: the SPARC v8 ABI specifies that fp values
are passed in integer registers, there no mov INTREG, FPREG
instruction, but still all this should not be done for an inlined
function)
The code looks a little better for "bar", but still there's a lot of
_Z3barv:
.LLFB3:
!#PROLOGUE# 0
add %sp, -144, %sp
.LLCFI1:
!#PROLOGUE# 1
mov 123, %o3
mov 428, %g1
st %g1, [%sp+124]
st %o3, [%sp+120]
ldd [%sp+120], %o2
sethi %hi(Zi), %o5
std %o2, [%sp+96]
std %o2, [%sp+104]
ld [%sp+104], %o2
or %o5, %lo(Zi), %g1
ld [%sp+96], %o0
ld [%sp+108], %o3
add %o2, %o0, %o4
ld [%sp+100], %o1
st %o4, [%o5+%lo(Zi)]
add %o3, %o1, %o2
st %o2, [%g1+4]
nop
retl
sub %sp, -144, %sp
And finally the stack adjustment code is useless for "foobar":
_Z6foobarv:
.LLFB4:
!#PROLOGUE# 0
add %sp, -144, %sp
.LLCFI2:
!#PROLOGUE# 1
nop
retl
sub %sp, -144, %sp
I don't know how much of a performance impact all these have on real
programs, it seems that any inline function that is passed objects by
value would be affected.
Also see the thread starting at:
http://gcc.gnu.org/ml/gcc/2002-05/msg02821.html
>How-To-Repeat:
>Fix:
>Release-Note:
>Audit-Trail:
>Unformatted: