Consider the code
--------------------------------
struct complex {
double re, im;
complex(double r, double i) : re(r), im(i) {}
};
inline complex operator+(const complex& a, const complex& b) {
return complex(a.re+b.re, a.im+b.im);
}
complex addone(const complex& arg) {
return arg + complex(1,0);
}
-------------------------------
We get really lousy code for this, in all gcc versions, including
tree-ssa.
The reason is that we build a temporary struct to hold the 1, 0 and
don't
get rid of it, so we take no advantage of the zero.
Calling this foo.C, foo.C.t09.ssa gives
;; Function complex addone(const complex&) (_Z6addoneRK7complex)
complex addone(const complex&) (arg)
{
struct complex retval.12;
struct complex <UVa150>;
struct complex * T.8;
struct complex * T.9;
struct complex & T.10;
struct complex T.11;
{
T.8_2 = &<UVa150>;
{
double i;
double r;
struct complex * const this;
this_3 = (struct complex * const)T.8_2;
r_5 = 1.0e+0;
i_6 = 0.0;
{
this->re = 1.0e+0;
this->im = 0.0;
{
(void)0
}
}
};
T.9_9 = &<UVa150>;
T.10_10 = (struct complex &)T.9_9;
{
struct complex & b;
struct complex <UVa770>;
b_11 = T.10_10;
{
struct complex * T.1;
double T.2;
double T.3;
double T.4;
double T.5;
double T.6;
double T.7;
{
T.1_13 = &<UVa770>;
T.2_15 = arg->re;
T.3_16 = b->re;
T.4_17 = T.2_15 + T.3_16;
T.5_18 = arg->im;
T.6_19 = b->im;
T.7_20 = T.5_18 + T.6_19;
{
double i;
double r;
struct complex * const this;
this_21 = (struct complex * const)T.1_13;
r_23 = T.4_17;
i_24 = T.7_20;
{
this->re = T.4_17;
this->im = T.7_20;
{
(void)0
}
}
};
{
(void)0;
goto <ULa700>;
}
}
};
<ULa700>:;;
retval.12_27 = <UVa770>
};
T.11_28 = retval.12_27;
return retval.12_27;
}
}
------------------------------------------------------------
It would seem simple enough to eliminate all uses of the temporary
struct
<UVa150> by doing copy propagation: what we would have left is only the
initialization of the struct itself; all uses of the struct would get
the
1.0 and 0.0 values from its re and im fields. At this point the
temporary
struct should be eligible for killing.
If we could do this alone, we would greatly improve C++ performance,
especially
on things like the Boost graph library. It seems that we have most of
what we
need in place, right?