[Bug tree-optimization/41026] invariant address load inside loop with -Os.
rahul at icerasemi dot com
gcc-bugzilla@gcc.gnu.org
Fri Sep 11 10:04:00 GMT 2009
------- Comment #6 from rahul at icerasemi dot com 2009-09-11 10:03 -------
An interesting regression results as a side effect of loop header copying (this
occurs even in vanilla O2). If I modify my original test case to
struct struct_t {
int* data;
};
void testAddr (struct struct_t* sp, int len)
{
short i;
for (i = 0; i < len; i++)
{
sp->data[len-i-1] = 0;
}
}
The index is now a short, and I have purposefully added an int to form the
final induction variable.
With gcc -S -O2 -fdump-tree-all, I get the following SSA
short int i;
int * D.1220;
long unsigned int D.1219;
long unsigned int D.1218;
long unsigned int D.1217;
int D.1216;
int D.1215;
int * D.1214;
<bb 2>:
goto <bb 4>;
<bb 3>:
D.1214_6 = sp_5(D)->data;
D.1215_7 = (int) i_1;
D.1216_8 = len_4(D) - D.1215_7;
D.1217_9 = (long unsigned int) D.1216_8;
D.1218_10 = D.1217_9 + -1;
D.1219_11 = D.1218_10 * 4;
D.1220_12 = D.1214_6 + D.1219_11;
*D.1220_12 ={v} 0;
i_13 = i_1 + 1;
<bb 4>:
# i_1 = PHI <0(2), i_13(3)>
D.1215_3 = (int) i_1;
if (D.1215_3 < len_4(D))
goto <bb 3>;
else
goto <bb 5>;
<bb 5>:
return;
The following copy propagation and/or FRE passes identify D.1215_7 as a copy of
D.1215_3 and we get
<bb 3>:
D.1214_6 = sp_5(D)->data;
D.1216_8 = len_4(D) - D.1215_3;
D.1217_9 = (long unsigned int) D.1216_8;
D.1218_10 = D.1217_9 + -1;
D.1219_11 = D.1218_10 * 4;
D.1220_12 = D.1214_6 + D.1219_11;
*D.1220_12 = 0;
i_13 = i_1 + 1;
Loop header copying introduces a PHI for D.1215
<bb 2>:
D.1215_19 = 0;
if (D.1215_19 < len_4(D))
goto <bb 3>;
else
goto <bb 4>;
<bb 3>:
# i_20 = PHI <i_13(3), 0(2)>
# D.1215_21 = PHI <D.1215_3(3), D.1215_19(2)>
D.1214_6 = sp_5(D)->data;
D.1216_8 = len_4(D) - D.1215_21;
D.1217_9 = (long unsigned int) D.1216_8;
D.1218_10 = D.1217_9 + -1;
D.1219_11 = D.1218_10 * 4;
D.1220_12 = D.1214_6 + D.1219_11;
*D.1220_12 = 0;
i_13 = i_20 + 1;
D.1215_3 = (int) i_13;
if (D.1215_3 < len_4(D))
goto <bb 3>;
else
goto <bb 4>;
This causes IVOpts below, and all subsequent optimisations to fall over.
<bb 3>:
D.1214_6 = sp_5(D)->data;
D.1238_7 = (unsigned int) len_4(D);
D.1239_1 = D.1238_7 + 0x0ffffffff;
__builtin_loop_start (1, D.1239_1);
D.1241_24 = (unsigned int) len_4(D);
<bb 4>:
# D.1215_21 = PHI <0(3), D.1215_3(5)>
# ivtmp.13_14 = PHI <0(3), ivtmp.13_18(5)>
__builtin_loop_iteration (1);
D.1216_8 = len_4(D) - D.1215_21;
D.1217_9 = (long unsigned int) D.1216_8;
D.1218_10 = D.1217_9 + -1;
D.1219_11 = D.1218_10 * 4;
D.1220_12 = D.1214_6 + D.1219_11;
*D.1220_12 = 0;
D.1240_19 = ivtmp.13_14 + 1;
D.1215_23 = (int) D.1240_19;
D.1215_3 = D.1215_23;
ivtmp.13_18 = ivtmp.13_14 + 1;
if (ivtmp.13_18 != D.1241_24)
goto <bb 5>;
else
goto <bb 6>;
On this test using -fno-tree-copy-prop -fno-tree-pre results in better
optimizations, implying either copy propagating (across blocks) / FREing
potential induction variables is undesirable. Or a less ideal solution is
disable loop header copying when dealing with type promoted loop indices.
--
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=41026
More information about the Gcc-bugs
mailing list